load('./files/Thyroid_carcinoma.RData')
View(raw_counts_df)
View(c_anno_df)
View(r_anno_df)
library(biomaRt)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.4.1
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks biomaRt::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyverse)
To display the available data bases
listMarts()
## biomart version
## 1 ENSEMBL_MART_ENSEMBL Ensembl Genes 109
## 2 ENSEMBL_MART_MOUSE Mouse strains 109
## 3 ENSEMBL_MART_SNP Ensembl Variation 109
## 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 109
The function useMart enables us to select a biomart database to use and the associated datasets. We select those with the parameters biomart and dataset respectively. The object returned is a Mart Class and can be seen as a connection to the Biomart database selected.
ensembl <- useMart(biomart = 'ensembl', dataset = 'hsapiens_gene_ensembl')
By using the library biomart we can retrieve a query from BioMart database using a set of filters and corresponding values, the attributes we want to retrieve need to be specified and here correspond to ensembl_gene_id, external_gene_name, gene_biotype. Specifically ensembl_gene_id need to be filtered based on the ensembl_gene_id of our data frame r_ anno_df while gene_biotype represents the information we need, so if the genes are coding or not.
filtering <- r_anno_df$ensembl_gene_id
query <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name','gene_biotype'),filters = c('ensembl_gene_id'),values = list(filtering),mart = ensembl)
Here we filtered the query with only the genes that are coding genes.
query_protein_coding <- query %>%
filter(gene_biotype == 'protein_coding')
After that, we filter both r_anno_df and raw_counts_df based on the enesembl_gene_id of the query containing the protein-coding gene.
r_anno_df_pro_cod <- r_anno_df %>%
filter(ensembl_gene_id %in% query_protein_coding$ensembl_gene_id)
raw_count_df_pro_cod <- raw_counts_df[which(rownames(raw_counts_df) %in% query_protein_coding$ensembl_gene_id),]
library(edgeR)
## Loading required package: limma
library(fgsea)
Here we set the thresholds to remove genes with low signals, this is needed because those genes have low statistical power and give us no information.
count_thr <- 20 # raw counts data retaining only genes with a raw count >20
repl_thr <- 5 # 5 Cases or 5 Control samples
We count how many replicates in each group satisfy the count threshold. This passage is needed to apply the first threshold and eliminate the rows which don’t satisfy it. We create the filter_vec, a vector of samples that surpass the thresholds with raw count >= 20.
filter_vec <- apply(raw_count_df_pro_cod,1, function(y) max(by(y,c_anno_df$condition, function(x) sum(x >= count_thr))))
#summary of the vector
table(filter_vec)
## filter_vec
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 4665 339 172 129 106 80 66 75 50 57 50 54 40
## 13 14 15 16 17 18 19 20 21 22 23 24 25
## 49 43 38 42 39 38 37 36 32 39 34 30 33
## 26 27 28 29 30 31 32 33 34 35 36 37 38
## 42 41 50 33 34 38 41 37 35 35 43 46 34
## 39 40 41 42 43 44 45 46 47 48 49 50
## 53 52 64 64 80 78 103 146 161 236 517 13723
Then we create a new dataframe filtered based on the filter_vec with a threshold on the replicates
filter_count_df <- raw_count_df_pro_cod[filter_vec >= repl_thr,]
dim(filter_count_df) # the rows represent the number of transcripts that satisfy the threshold
## [1] 16748 100
Update the gene annotation using the filter. The gene annotation file must be consistent with the data files we are using.
filter_anno_df <- r_anno_df_pro_cod[rownames(filter_count_df),] # we filter the gene annotation file
dim(filter_anno_df) # the number of row is equal to the number of row of the file filter_count_df
## [1] 16748 3
Now we can proceed with the DEG analysis using edgeR. First, we create a DGElist object using the function DGEList. This object contains all the information needed for the next steps (counts (=transcript counts), groups (=), and samples).
edge_c <- DGEList(counts = filter_count_df, group = c_anno_df$condition, samples = c_anno_df, genes = filter_anno_df)
edge_c
## An object of class "DGEList"
## $counts
## TCGA-DJ-A2Q1-01A TCGA-H2-A2K9-11A TCGA-H2-A3RI-11A
## ENSG00000187634 1385 596 728
## ENSG00000188976 4076 4114 3988
## ENSG00000187961 285 226 243
## ENSG00000187583 650 54 37
## ENSG00000187642 570 22 12
## TCGA-DJ-A4V4-01A TCGA-EL-A3T9-01A TCGA-BJ-A0ZB-01A
## ENSG00000187634 1336 1173 1071
## ENSG00000188976 4114 5712 5383
## ENSG00000187961 338 623 240
## ENSG00000187583 557 931 489
## ENSG00000187642 298 324 181
## TCGA-EL-A3H1-01A TCGA-H2-A3RI-01A TCGA-EL-A3GR-01A
## ENSG00000187634 856 2180 825
## ENSG00000188976 4703 6086 3074
## ENSG00000187961 248 501 294
## ENSG00000187583 53 746 567
## ENSG00000187642 12 236 143
## TCGA-DJ-A3UN-01A TCGA-DO-A1JZ-01A TCGA-EL-A3GU-01A
## ENSG00000187634 1358 2147 729
## ENSG00000188976 5933 5130 3542
## ENSG00000187961 408 314 282
## ENSG00000187583 767 331 536
## ENSG00000187642 325 107 163
## TCGA-DJ-A1QM-01A TCGA-EM-A1CT-11A TCGA-EL-A3CZ-01A
## ENSG00000187634 1427 823 330
## ENSG00000188976 5293 4467 2769
## ENSG00000187961 288 145 133
## ENSG00000187583 428 24 105
## ENSG00000187642 146 17 30
## TCGA-EL-A3ZS-11A TCGA-J8-A3YE-01A TCGA-ET-A2N4-01A
## ENSG00000187634 712 833 2508
## ENSG00000188976 1946 4416 6531
## ENSG00000187961 195 613 394
## ENSG00000187583 40 526 967
## ENSG00000187642 35 156 416
## TCGA-EM-A4FO-01A TCGA-EL-A3ZO-11A TCGA-ET-A4KN-01A
## ENSG00000187634 1389 847 4158
## ENSG00000188976 4618 3365 4921
## ENSG00000187961 519 202 132
## ENSG00000187583 545 49 396
## ENSG00000187642 277 26 569
## TCGA-EL-A3T1-11A TCGA-ET-A2N5-11B TCGA-BJ-A2N8-11A
## ENSG00000187634 756 777 947
## ENSG00000188976 3407 5134 6688
## ENSG00000187961 169 197 190
## ENSG00000187583 69 7 87
## ENSG00000187642 33 9 29
## TCGA-FY-A3I4-01A TCGA-EL-A3ZQ-11A TCGA-DJ-A2Q2-01A
## ENSG00000187634 2044 902 885
## ENSG00000188976 6328 4849 5436
## ENSG00000187961 372 413 242
## ENSG00000187583 1059 55 153
## ENSG00000187642 355 25 51
## TCGA-EL-A3ZP-11A TCGA-EL-A3GQ-01A TCGA-EL-A3TA-11A
## ENSG00000187634 536 2009 838
## ENSG00000188976 3141 4217 5328
## ENSG00000187961 241 309 226
## ENSG00000187583 59 373 23
## ENSG00000187642 20 129 16
## TCGA-CE-A481-01A TCGA-E8-A438-01A TCGA-EL-A3H7-11A
## ENSG00000187634 828 3638 779
## ENSG00000188976 4236 5774 4484
## ENSG00000187961 430 733 188
## ENSG00000187583 323 1190 73
## ENSG00000187642 133 673 31
## TCGA-EL-A3GZ-11A TCGA-E8-A413-01A TCGA-BJ-A28X-11A
## ENSG00000187634 518 1230 834
## ENSG00000188976 3293 4564 4379
## ENSG00000187961 100 431 527
## ENSG00000187583 12 611 116
## ENSG00000187642 6 188 27
## TCGA-EM-A2CS-01A TCGA-ET-A3BX-01A TCGA-DJ-A13V-01A
## ENSG00000187634 1512 2770 1882
## ENSG00000188976 5074 5293 4500
## ENSG00000187961 249 310 229
## ENSG00000187583 279 1276 688
## ENSG00000187642 92 457 250
## TCGA-KS-A41I-11A TCGA-E8-A44K-01A TCGA-ET-A3DW-11A
## ENSG00000187634 639 601 672
## ENSG00000188976 3641 3296 3835
## ENSG00000187961 353 359 125
## ENSG00000187583 110 442 52
## ENSG00000187642 44 155 19
## TCGA-EL-A3ZL-11A TCGA-EL-A4JV-01A TCGA-FE-A231-01A
## ENSG00000187634 665 711 586
## ENSG00000188976 4600 3679 5244
## ENSG00000187961 207 252 284
## ENSG00000187583 47 136 713
## ENSG00000187642 29 41 209
## TCGA-BJ-A28R-11A TCGA-EM-A1CS-11A TCGA-FE-A3PA-01A
## ENSG00000187634 623 878 1346
## ENSG00000188976 3382 4261 4057
## ENSG00000187961 169 148 763
## ENSG00000187583 48 22 716
## ENSG00000187642 13 14 633
## TCGA-EM-A2CQ-01A TCGA-EL-A3TB-11A TCGA-BJ-A28W-11A
## ENSG00000187634 741 987 624
## ENSG00000188976 5195 4640 2636
## ENSG00000187961 158 127 128
## ENSG00000187583 177 9 129
## ENSG00000187642 49 7 76
## TCGA-KS-A41J-11A TCGA-EL-A3T2-11A TCGA-KS-A41L-11A
## ENSG00000187634 590 572 736
## ENSG00000188976 4202 4169 4679
## ENSG00000187961 306 206 375
## ENSG00000187583 53 18 28
## ENSG00000187642 13 22 17
## TCGA-EL-A3T0-11A TCGA-DJ-A3UP-01A TCGA-DJ-A4V0-01A
## ENSG00000187634 616 772 962
## ENSG00000188976 3781 4261 3834
## ENSG00000187961 193 315 404
## ENSG00000187583 23 350 258
## ENSG00000187642 12 122 79
## TCGA-EL-A3ZK-11A TCGA-BJ-A3PT-01A TCGA-EM-A3OA-01A
## ENSG00000187634 980 2430 1895
## ENSG00000188976 3483 4515 7998
## ENSG00000187961 328 229 577
## ENSG00000187583 118 276 156
## ENSG00000187642 47 156 47
## TCGA-FY-A3TY-11A TCGA-EL-A3MY-11A TCGA-FY-A3R9-01A
## ENSG00000187634 507 1034 937
## ENSG00000188976 3192 3762 3901
## ENSG00000187961 244 189 182
## ENSG00000187583 15 45 78
## ENSG00000187642 11 14 30
## TCGA-BJ-A3PR-11A TCGA-EL-A3H2-11A TCGA-ET-A2MY-01A
## ENSG00000187634 2733 673 1765
## ENSG00000188976 5395 4086 5239
## ENSG00000187961 291 123 206
## ENSG00000187583 62 20 509
## ENSG00000187642 71 4 209
## TCGA-ET-A3DP-11A TCGA-L6-A4ET-01A TCGA-BJ-A3PU-11A
## ENSG00000187634 643 3035 1017
## ENSG00000188976 4576 6615 5129
## ENSG00000187961 58 651 240
## ENSG00000187583 13 1070 38
## ENSG00000187642 5 366 18
## TCGA-DO-A2HM-01B TCGA-E8-A2JQ-11A TCGA-EL-A3ZT-11A
## ENSG00000187634 592 644 424
## ENSG00000188976 3982 3262 3511
## ENSG00000187961 480 233 259
## ENSG00000187583 413 73 23
## ENSG00000187642 132 28 8
## TCGA-EL-A3MW-11A TCGA-BJ-A28T-01A TCGA-DO-A1JZ-11A
## ENSG00000187634 676 293 469
## ENSG00000188976 3402 1600 3863
## ENSG00000187961 109 87 95
## ENSG00000187583 21 71 15
## ENSG00000187642 4 25 3
## TCGA-EL-A3N3-11A TCGA-BJ-A290-11A TCGA-E8-A242-01A
## ENSG00000187634 723 812 2043
## ENSG00000188976 4296 4258 5579
## ENSG00000187961 201 149 303
## ENSG00000187583 40 25 1434
## ENSG00000187642 18 6 497
## TCGA-ET-A39L-01A TCGA-EL-A3MY-01A TCGA-EL-A3T8-11A
## ENSG00000187634 1026 1248 856
## ENSG00000188976 5111 4227 3813
## ENSG00000187961 188 336 283
## ENSG00000187583 264 991 69
## ENSG00000187642 104 337 39
## TCGA-EM-A1CV-11A TCGA-EM-A1CW-11A TCGA-EL-A3N2-11A
## ENSG00000187634 702 169 517
## ENSG00000188976 4533 1821 3685
## ENSG00000187961 148 43 155
## ENSG00000187583 28 7 40
## ENSG00000187642 11 0 18
## TCGA-EL-A3T3-11A TCGA-BJ-A45G-01A TCGA-EL-A3CY-01A
## ENSG00000187634 1016 842 987
## ENSG00000188976 4499 4031 4132
## ENSG00000187961 257 256 383
## ENSG00000187583 22 311 301
## ENSG00000187642 14 121 111
## TCGA-EL-A3ZR-11A TCGA-EL-A3T7-11A TCGA-CE-A3ME-01A
## ENSG00000187634 625 600 1428
## ENSG00000188976 3827 3951 5277
## ENSG00000187961 304 272 320
## ENSG00000187583 109 32 378
## ENSG00000187642 250 26 154
## TCGA-EL-A3T6-11A TCGA-KS-A4ID-01A TCGA-ET-A3BP-01A
## ENSG00000187634 1130 867 1631
## ENSG00000188976 4429 3260 4683
## ENSG00000187961 307 241 275
## ENSG00000187583 96 444 466
## ENSG00000187642 27 197 232
## TCGA-EL-A3ZG-11A TCGA-ET-A2MX-11C TCGA-BJ-A18Z-01A
## ENSG00000187634 668 765 718
## ENSG00000188976 2450 3985 2503
## ENSG00000187961 122 204 286
## ENSG00000187583 8 32 549
## ENSG00000187642 8 13 245
## TCGA-GE-A2C6-11A TCGA-BJ-A2N9-11A TCGA-ET-A39N-01A
## ENSG00000187634 368 867 1338
## ENSG00000188976 2513 4565 5517
## ENSG00000187961 125 248 163
## ENSG00000187583 34 75 94
## ENSG00000187642 22 26 55
## TCGA-DJ-A2QC-01A
## ENSG00000187634 3174
## ENSG00000188976 6177
## ENSG00000187961 312
## ENSG00000187583 610
## ENSG00000187642 346
## 16743 more rows ...
##
## $samples
## group lib.size norm.factors sample condition
## TCGA-DJ-A2Q1-01A case 68063260 1 TCGA-DJ-A2Q1-01A case
## TCGA-H2-A2K9-11A control 79992173 1 TCGA-H2-A2K9-11A control
## TCGA-H2-A3RI-11A control 76205032 1 TCGA-H2-A3RI-11A control
## TCGA-DJ-A4V4-01A case 57746743 1 TCGA-DJ-A4V4-01A case
## TCGA-EL-A3T9-01A case 69959466 1 TCGA-EL-A3T9-01A case
## 95 more rows ...
##
## $genes
## ensembl_gene_id external_gene_name length
## ENSG00000187634 ENSG00000187634 SAMD11 20652
## ENSG00000188976 ENSG00000188976 NOC2L 15106
## ENSG00000187961 ENSG00000187961 KLHL17 5135
## ENSG00000187583 ENSG00000187583 PLEKHN1 9383
## ENSG00000187642 ENSG00000187642 PERM1 6919
## 16743 more rows ...
Normalization using calcNormFactor and the TMM method. This method allows us to perform intra and inter-normalization of the data. A scaling factor is calculated based on the library size, plus with the TMM method the normalization focuses on the convergence of the means.
edge_n <- calcNormFactors(edge_c,method = 'TMM')
edge_n # identical to edge_c just a column called normalization factor is added.
## An object of class "DGEList"
## $counts
## TCGA-DJ-A2Q1-01A TCGA-H2-A2K9-11A TCGA-H2-A3RI-11A
## ENSG00000187634 1385 596 728
## ENSG00000188976 4076 4114 3988
## ENSG00000187961 285 226 243
## ENSG00000187583 650 54 37
## ENSG00000187642 570 22 12
## TCGA-DJ-A4V4-01A TCGA-EL-A3T9-01A TCGA-BJ-A0ZB-01A
## ENSG00000187634 1336 1173 1071
## ENSG00000188976 4114 5712 5383
## ENSG00000187961 338 623 240
## ENSG00000187583 557 931 489
## ENSG00000187642 298 324 181
## TCGA-EL-A3H1-01A TCGA-H2-A3RI-01A TCGA-EL-A3GR-01A
## ENSG00000187634 856 2180 825
## ENSG00000188976 4703 6086 3074
## ENSG00000187961 248 501 294
## ENSG00000187583 53 746 567
## ENSG00000187642 12 236 143
## TCGA-DJ-A3UN-01A TCGA-DO-A1JZ-01A TCGA-EL-A3GU-01A
## ENSG00000187634 1358 2147 729
## ENSG00000188976 5933 5130 3542
## ENSG00000187961 408 314 282
## ENSG00000187583 767 331 536
## ENSG00000187642 325 107 163
## TCGA-DJ-A1QM-01A TCGA-EM-A1CT-11A TCGA-EL-A3CZ-01A
## ENSG00000187634 1427 823 330
## ENSG00000188976 5293 4467 2769
## ENSG00000187961 288 145 133
## ENSG00000187583 428 24 105
## ENSG00000187642 146 17 30
## TCGA-EL-A3ZS-11A TCGA-J8-A3YE-01A TCGA-ET-A2N4-01A
## ENSG00000187634 712 833 2508
## ENSG00000188976 1946 4416 6531
## ENSG00000187961 195 613 394
## ENSG00000187583 40 526 967
## ENSG00000187642 35 156 416
## TCGA-EM-A4FO-01A TCGA-EL-A3ZO-11A TCGA-ET-A4KN-01A
## ENSG00000187634 1389 847 4158
## ENSG00000188976 4618 3365 4921
## ENSG00000187961 519 202 132
## ENSG00000187583 545 49 396
## ENSG00000187642 277 26 569
## TCGA-EL-A3T1-11A TCGA-ET-A2N5-11B TCGA-BJ-A2N8-11A
## ENSG00000187634 756 777 947
## ENSG00000188976 3407 5134 6688
## ENSG00000187961 169 197 190
## ENSG00000187583 69 7 87
## ENSG00000187642 33 9 29
## TCGA-FY-A3I4-01A TCGA-EL-A3ZQ-11A TCGA-DJ-A2Q2-01A
## ENSG00000187634 2044 902 885
## ENSG00000188976 6328 4849 5436
## ENSG00000187961 372 413 242
## ENSG00000187583 1059 55 153
## ENSG00000187642 355 25 51
## TCGA-EL-A3ZP-11A TCGA-EL-A3GQ-01A TCGA-EL-A3TA-11A
## ENSG00000187634 536 2009 838
## ENSG00000188976 3141 4217 5328
## ENSG00000187961 241 309 226
## ENSG00000187583 59 373 23
## ENSG00000187642 20 129 16
## TCGA-CE-A481-01A TCGA-E8-A438-01A TCGA-EL-A3H7-11A
## ENSG00000187634 828 3638 779
## ENSG00000188976 4236 5774 4484
## ENSG00000187961 430 733 188
## ENSG00000187583 323 1190 73
## ENSG00000187642 133 673 31
## TCGA-EL-A3GZ-11A TCGA-E8-A413-01A TCGA-BJ-A28X-11A
## ENSG00000187634 518 1230 834
## ENSG00000188976 3293 4564 4379
## ENSG00000187961 100 431 527
## ENSG00000187583 12 611 116
## ENSG00000187642 6 188 27
## TCGA-EM-A2CS-01A TCGA-ET-A3BX-01A TCGA-DJ-A13V-01A
## ENSG00000187634 1512 2770 1882
## ENSG00000188976 5074 5293 4500
## ENSG00000187961 249 310 229
## ENSG00000187583 279 1276 688
## ENSG00000187642 92 457 250
## TCGA-KS-A41I-11A TCGA-E8-A44K-01A TCGA-ET-A3DW-11A
## ENSG00000187634 639 601 672
## ENSG00000188976 3641 3296 3835
## ENSG00000187961 353 359 125
## ENSG00000187583 110 442 52
## ENSG00000187642 44 155 19
## TCGA-EL-A3ZL-11A TCGA-EL-A4JV-01A TCGA-FE-A231-01A
## ENSG00000187634 665 711 586
## ENSG00000188976 4600 3679 5244
## ENSG00000187961 207 252 284
## ENSG00000187583 47 136 713
## ENSG00000187642 29 41 209
## TCGA-BJ-A28R-11A TCGA-EM-A1CS-11A TCGA-FE-A3PA-01A
## ENSG00000187634 623 878 1346
## ENSG00000188976 3382 4261 4057
## ENSG00000187961 169 148 763
## ENSG00000187583 48 22 716
## ENSG00000187642 13 14 633
## TCGA-EM-A2CQ-01A TCGA-EL-A3TB-11A TCGA-BJ-A28W-11A
## ENSG00000187634 741 987 624
## ENSG00000188976 5195 4640 2636
## ENSG00000187961 158 127 128
## ENSG00000187583 177 9 129
## ENSG00000187642 49 7 76
## TCGA-KS-A41J-11A TCGA-EL-A3T2-11A TCGA-KS-A41L-11A
## ENSG00000187634 590 572 736
## ENSG00000188976 4202 4169 4679
## ENSG00000187961 306 206 375
## ENSG00000187583 53 18 28
## ENSG00000187642 13 22 17
## TCGA-EL-A3T0-11A TCGA-DJ-A3UP-01A TCGA-DJ-A4V0-01A
## ENSG00000187634 616 772 962
## ENSG00000188976 3781 4261 3834
## ENSG00000187961 193 315 404
## ENSG00000187583 23 350 258
## ENSG00000187642 12 122 79
## TCGA-EL-A3ZK-11A TCGA-BJ-A3PT-01A TCGA-EM-A3OA-01A
## ENSG00000187634 980 2430 1895
## ENSG00000188976 3483 4515 7998
## ENSG00000187961 328 229 577
## ENSG00000187583 118 276 156
## ENSG00000187642 47 156 47
## TCGA-FY-A3TY-11A TCGA-EL-A3MY-11A TCGA-FY-A3R9-01A
## ENSG00000187634 507 1034 937
## ENSG00000188976 3192 3762 3901
## ENSG00000187961 244 189 182
## ENSG00000187583 15 45 78
## ENSG00000187642 11 14 30
## TCGA-BJ-A3PR-11A TCGA-EL-A3H2-11A TCGA-ET-A2MY-01A
## ENSG00000187634 2733 673 1765
## ENSG00000188976 5395 4086 5239
## ENSG00000187961 291 123 206
## ENSG00000187583 62 20 509
## ENSG00000187642 71 4 209
## TCGA-ET-A3DP-11A TCGA-L6-A4ET-01A TCGA-BJ-A3PU-11A
## ENSG00000187634 643 3035 1017
## ENSG00000188976 4576 6615 5129
## ENSG00000187961 58 651 240
## ENSG00000187583 13 1070 38
## ENSG00000187642 5 366 18
## TCGA-DO-A2HM-01B TCGA-E8-A2JQ-11A TCGA-EL-A3ZT-11A
## ENSG00000187634 592 644 424
## ENSG00000188976 3982 3262 3511
## ENSG00000187961 480 233 259
## ENSG00000187583 413 73 23
## ENSG00000187642 132 28 8
## TCGA-EL-A3MW-11A TCGA-BJ-A28T-01A TCGA-DO-A1JZ-11A
## ENSG00000187634 676 293 469
## ENSG00000188976 3402 1600 3863
## ENSG00000187961 109 87 95
## ENSG00000187583 21 71 15
## ENSG00000187642 4 25 3
## TCGA-EL-A3N3-11A TCGA-BJ-A290-11A TCGA-E8-A242-01A
## ENSG00000187634 723 812 2043
## ENSG00000188976 4296 4258 5579
## ENSG00000187961 201 149 303
## ENSG00000187583 40 25 1434
## ENSG00000187642 18 6 497
## TCGA-ET-A39L-01A TCGA-EL-A3MY-01A TCGA-EL-A3T8-11A
## ENSG00000187634 1026 1248 856
## ENSG00000188976 5111 4227 3813
## ENSG00000187961 188 336 283
## ENSG00000187583 264 991 69
## ENSG00000187642 104 337 39
## TCGA-EM-A1CV-11A TCGA-EM-A1CW-11A TCGA-EL-A3N2-11A
## ENSG00000187634 702 169 517
## ENSG00000188976 4533 1821 3685
## ENSG00000187961 148 43 155
## ENSG00000187583 28 7 40
## ENSG00000187642 11 0 18
## TCGA-EL-A3T3-11A TCGA-BJ-A45G-01A TCGA-EL-A3CY-01A
## ENSG00000187634 1016 842 987
## ENSG00000188976 4499 4031 4132
## ENSG00000187961 257 256 383
## ENSG00000187583 22 311 301
## ENSG00000187642 14 121 111
## TCGA-EL-A3ZR-11A TCGA-EL-A3T7-11A TCGA-CE-A3ME-01A
## ENSG00000187634 625 600 1428
## ENSG00000188976 3827 3951 5277
## ENSG00000187961 304 272 320
## ENSG00000187583 109 32 378
## ENSG00000187642 250 26 154
## TCGA-EL-A3T6-11A TCGA-KS-A4ID-01A TCGA-ET-A3BP-01A
## ENSG00000187634 1130 867 1631
## ENSG00000188976 4429 3260 4683
## ENSG00000187961 307 241 275
## ENSG00000187583 96 444 466
## ENSG00000187642 27 197 232
## TCGA-EL-A3ZG-11A TCGA-ET-A2MX-11C TCGA-BJ-A18Z-01A
## ENSG00000187634 668 765 718
## ENSG00000188976 2450 3985 2503
## ENSG00000187961 122 204 286
## ENSG00000187583 8 32 549
## ENSG00000187642 8 13 245
## TCGA-GE-A2C6-11A TCGA-BJ-A2N9-11A TCGA-ET-A39N-01A
## ENSG00000187634 368 867 1338
## ENSG00000188976 2513 4565 5517
## ENSG00000187961 125 248 163
## ENSG00000187583 34 75 94
## ENSG00000187642 22 26 55
## TCGA-DJ-A2QC-01A
## ENSG00000187634 3174
## ENSG00000188976 6177
## ENSG00000187961 312
## ENSG00000187583 610
## ENSG00000187642 346
## 16743 more rows ...
##
## $samples
## group lib.size norm.factors sample condition
## TCGA-DJ-A2Q1-01A case 68063260 0.7881489 TCGA-DJ-A2Q1-01A case
## TCGA-H2-A2K9-11A control 79992173 1.0866990 TCGA-H2-A2K9-11A control
## TCGA-H2-A3RI-11A control 76205032 1.0931174 TCGA-H2-A3RI-11A control
## TCGA-DJ-A4V4-01A case 57746743 0.9422199 TCGA-DJ-A4V4-01A case
## TCGA-EL-A3T9-01A case 69959466 0.9466800 TCGA-EL-A3T9-01A case
## 95 more rows ...
##
## $genes
## ensembl_gene_id external_gene_name length
## ENSG00000187634 ENSG00000187634 SAMD11 20652
## ENSG00000188976 ENSG00000188976 NOC2L 15106
## ENSG00000187961 ENSG00000187961 KLHL17 5135
## ENSG00000187583 ENSG00000187583 PLEKHN1 9383
## ENSG00000187642 ENSG00000187642 PERM1 6919
## 16743 more rows ...
We can then create a CPM table to normalize expression values. We use the CPM function and transform the result in a dataframe with function as.data.frame(). Also we create another CPM table in log10 transformation.
cpm_table <- as.data.frame(round(cpm(edge_n),2)) # the library size is scaled by the normalization factor
head(cpm_table)
## TCGA-DJ-A2Q1-01A TCGA-H2-A2K9-11A TCGA-H2-A3RI-11A
## ENSG00000187634 25.82 6.86 8.74
## ENSG00000188976 75.98 47.33 47.87
## ENSG00000187961 5.31 2.60 2.92
## ENSG00000187583 12.12 0.62 0.44
## ENSG00000187642 10.63 0.25 0.14
## ENSG00000188290 2.16 0.63 0.95
## TCGA-DJ-A4V4-01A TCGA-EL-A3T9-01A TCGA-BJ-A0ZB-01A
## ENSG00000187634 24.55 17.71 14.23
## ENSG00000188976 75.61 86.25 71.53
## ENSG00000187961 6.21 9.41 3.19
## ENSG00000187583 10.24 14.06 6.50
## ENSG00000187642 5.48 4.89 2.41
## ENSG00000188290 7.41 3.62 3.15
## TCGA-EL-A3H1-01A TCGA-H2-A3RI-01A TCGA-EL-A3GR-01A
## ENSG00000187634 11.13 25.51 14.57
## ENSG00000188976 61.14 71.23 54.27
## ENSG00000187961 3.22 5.86 5.19
## ENSG00000187583 0.69 8.73 10.01
## ENSG00000187642 0.16 2.76 2.52
## ENSG00000188290 3.55 2.97 7.38
## TCGA-DJ-A3UN-01A TCGA-DO-A1JZ-01A TCGA-EL-A3GU-01A
## ENSG00000187634 19.89 34.35 12.24
## ENSG00000188976 86.91 82.07 59.46
## ENSG00000187961 5.98 5.02 4.73
## ENSG00000187583 11.24 5.30 9.00
## ENSG00000187642 4.76 1.71 2.74
## ENSG00000188290 3.68 3.26 13.11
## TCGA-DJ-A1QM-01A TCGA-EM-A1CT-11A TCGA-EL-A3CZ-01A
## ENSG00000187634 16.45 10.71 5.87
## ENSG00000188976 61.02 58.15 49.23
## ENSG00000187961 3.32 1.89 2.36
## ENSG00000187583 4.93 0.31 1.87
## ENSG00000187642 1.68 0.22 0.53
## ENSG00000188290 1.78 1.45 1.01
## TCGA-EL-A3ZS-11A TCGA-J8-A3YE-01A TCGA-ET-A2N4-01A
## ENSG00000187634 29.30 11.81 39.34
## ENSG00000188976 80.09 62.63 102.45
## ENSG00000187961 8.03 8.69 6.18
## ENSG00000187583 1.65 7.46 15.17
## ENSG00000187642 1.44 2.21 6.53
## ENSG00000188290 16.91 9.73 7.81
## TCGA-EM-A4FO-01A TCGA-EL-A3ZO-11A TCGA-ET-A4KN-01A
## ENSG00000187634 20.25 12.50 201.92
## ENSG00000188976 67.33 49.68 238.97
## ENSG00000187961 7.57 2.98 6.41
## ENSG00000187583 7.95 0.72 19.23
## ENSG00000187642 4.04 0.38 27.63
## ENSG00000188290 8.75 10.69 18.79
## TCGA-EL-A3T1-11A TCGA-ET-A2N5-11B TCGA-BJ-A2N8-11A
## ENSG00000187634 11.99 9.82 8.84
## ENSG00000188976 54.06 64.87 62.45
## ENSG00000187961 2.68 2.49 1.77
## ENSG00000187583 1.09 0.09 0.81
## ENSG00000187642 0.52 0.11 0.27
## ENSG00000188290 3.13 0.86 0.57
## TCGA-FY-A3I4-01A TCGA-EL-A3ZQ-11A TCGA-DJ-A2Q2-01A
## ENSG00000187634 21.71 10.34 11.54
## ENSG00000188976 67.20 55.58 70.86
## ENSG00000187961 3.95 4.73 3.15
## ENSG00000187583 11.25 0.63 1.99
## ENSG00000187642 3.77 0.29 0.66
## ENSG00000188290 8.85 1.93 1.94
## TCGA-EL-A3ZP-11A TCGA-EL-A3GQ-01A TCGA-EL-A3TA-11A
## ENSG00000187634 8.31 29.53 9.99
## ENSG00000188976 48.67 61.98 63.50
## ENSG00000187961 3.73 4.54 2.69
## ENSG00000187583 0.91 5.48 0.27
## ENSG00000187642 0.31 1.90 0.19
## ENSG00000188290 2.28 1.44 0.68
## TCGA-CE-A481-01A TCGA-E8-A438-01A TCGA-EL-A3H7-11A
## ENSG00000187634 11.63 62.13 11.05
## ENSG00000188976 59.52 98.61 63.62
## ENSG00000187961 6.04 12.52 2.67
## ENSG00000187583 4.54 20.32 1.04
## ENSG00000187642 1.87 11.49 0.44
## ENSG00000188290 4.75 24.20 2.82
## TCGA-EL-A3GZ-11A TCGA-E8-A413-01A TCGA-BJ-A28X-11A
## ENSG00000187634 8.49 17.65 10.40
## ENSG00000188976 53.95 65.48 54.59
## ENSG00000187961 1.64 6.18 6.57
## ENSG00000187583 0.20 8.77 1.45
## ENSG00000187642 0.10 2.70 0.34
## ENSG00000188290 1.05 16.08 0.66
## TCGA-EM-A2CS-01A TCGA-ET-A3BX-01A TCGA-DJ-A13V-01A
## ENSG00000187634 21.93 42.67 35.63
## ENSG00000188976 73.61 81.54 85.20
## ENSG00000187961 3.61 4.78 4.34
## ENSG00000187583 4.05 19.66 13.03
## ENSG00000187642 1.33 7.04 4.73
## ENSG00000188290 3.21 6.67 5.28
## TCGA-KS-A41I-11A TCGA-E8-A44K-01A TCGA-ET-A3DW-11A
## ENSG00000187634 10.20 11.74 9.91
## ENSG00000188976 58.11 64.39 56.55
## ENSG00000187961 5.63 7.01 1.84
## ENSG00000187583 1.76 8.63 0.77
## ENSG00000187642 0.70 3.03 0.28
## ENSG00000188290 4.39 6.64 1.87
## TCGA-EL-A3ZL-11A TCGA-EL-A4JV-01A TCGA-FE-A231-01A
## ENSG00000187634 8.49 11.85 7.20
## ENSG00000188976 58.72 61.32 64.47
## ENSG00000187961 2.64 4.20 3.49
## ENSG00000187583 0.60 2.27 8.77
## ENSG00000187642 0.37 0.68 2.57
## ENSG00000188290 0.71 6.22 1.65
## TCGA-BJ-A28R-11A TCGA-EM-A1CS-11A TCGA-FE-A3PA-01A
## ENSG00000187634 9.33 12.88 19.75
## ENSG00000188976 50.65 62.52 59.54
## ENSG00000187961 2.53 2.17 11.20
## ENSG00000187583 0.72 0.32 10.51
## ENSG00000187642 0.19 0.21 9.29
## ENSG00000188290 1.57 1.28 22.07
## TCGA-EM-A2CQ-01A TCGA-EL-A3TB-11A TCGA-BJ-A28W-11A
## ENSG00000187634 9.29 12.26 19.19
## ENSG00000188976 65.15 57.64 81.07
## ENSG00000187961 1.98 1.58 3.94
## ENSG00000187583 2.22 0.11 3.97
## ENSG00000187642 0.61 0.09 2.34
## ENSG00000188290 3.21 0.58 6.89
## TCGA-KS-A41J-11A TCGA-EL-A3T2-11A TCGA-KS-A41L-11A
## ENSG00000187634 6.89 7.78 8.96
## ENSG00000188976 49.05 56.74 56.93
## ENSG00000187961 3.57 2.80 4.56
## ENSG00000187583 0.62 0.24 0.34
## ENSG00000187642 0.15 0.30 0.21
## ENSG00000188290 2.70 0.82 1.97
## TCGA-EL-A3T0-11A TCGA-DJ-A3UP-01A TCGA-DJ-A4V0-01A
## ENSG00000187634 9.04 10.24 13.97
## ENSG00000188976 55.46 56.52 55.69
## ENSG00000187961 2.83 4.18 5.87
## ENSG00000187583 0.34 4.64 3.75
## ENSG00000187642 0.18 1.62 1.15
## ENSG00000188290 0.97 1.01 2.40
## TCGA-EL-A3ZK-11A TCGA-BJ-A3PT-01A TCGA-EM-A3OA-01A
## ENSG00000187634 15.34 44.72 15.49
## ENSG00000188976 54.52 83.09 65.36
## ENSG00000187961 5.13 4.21 4.72
## ENSG00000187583 1.85 5.08 1.27
## ENSG00000187642 0.74 2.87 0.38
## ENSG00000188290 1.28 3.68 5.58
## TCGA-FY-A3TY-11A TCGA-EL-A3MY-11A TCGA-FY-A3R9-01A
## ENSG00000187634 8.31 13.88 14.08
## ENSG00000188976 52.32 50.49 58.63
## ENSG00000187961 4.00 2.54 2.74
## ENSG00000187583 0.25 0.60 1.17
## ENSG00000187642 0.18 0.19 0.45
## ENSG00000188290 1.16 2.81 1.46
## TCGA-BJ-A3PR-11A TCGA-EL-A3H2-11A TCGA-ET-A2MY-01A
## ENSG00000187634 51.92 8.87 30.73
## ENSG00000188976 102.50 53.86 91.21
## ENSG00000187961 5.53 1.62 3.59
## ENSG00000187583 1.18 0.26 8.86
## ENSG00000187642 1.35 0.05 3.64
## ENSG00000188290 4.52 0.45 3.19
## TCGA-ET-A3DP-11A TCGA-L6-A4ET-01A TCGA-BJ-A3PU-11A
## ENSG00000187634 8.28 49.16 11.42
## ENSG00000188976 58.93 107.15 57.60
## ENSG00000187961 0.75 10.54 2.70
## ENSG00000187583 0.17 17.33 0.43
## ENSG00000187642 0.06 5.93 0.20
## ENSG00000188290 1.20 9.61 1.12
## TCGA-DO-A2HM-01B TCGA-E8-A2JQ-11A TCGA-EL-A3ZT-11A
## ENSG00000187634 8.51 10.46 6.36
## ENSG00000188976 57.23 52.97 52.68
## ENSG00000187961 6.90 3.78 3.89
## ENSG00000187583 5.94 1.19 0.35
## ENSG00000187642 1.90 0.45 0.12
## ENSG00000188290 1.84 1.04 0.69
## TCGA-EL-A3MW-11A TCGA-BJ-A28T-01A TCGA-DO-A1JZ-11A
## ENSG00000187634 9.89 9.05 8.39
## ENSG00000188976 49.79 49.42 69.10
## ENSG00000187961 1.60 2.69 1.70
## ENSG00000187583 0.31 2.19 0.27
## ENSG00000187642 0.06 0.77 0.05
## ENSG00000188290 1.45 1.02 0.64
## TCGA-EL-A3N3-11A TCGA-BJ-A290-11A TCGA-E8-A242-01A
## ENSG00000187634 8.76 8.81 24.61
## ENSG00000188976 52.05 46.19 67.21
## ENSG00000187961 2.44 1.62 3.65
## ENSG00000187583 0.48 0.27 17.28
## ENSG00000187642 0.22 0.07 5.99
## ENSG00000188290 0.96 0.67 4.48
## TCGA-ET-A39L-01A TCGA-EL-A3MY-01A TCGA-EL-A3T8-11A
## ENSG00000187634 13.56 19.41 11.77
## ENSG00000188976 67.56 65.75 52.41
## ENSG00000187961 2.49 5.23 3.89
## ENSG00000187583 3.49 15.41 0.95
## ENSG00000187642 1.37 5.24 0.54
## ENSG00000188290 2.60 10.05 2.74
## TCGA-EM-A1CV-11A TCGA-EM-A1CW-11A TCGA-EL-A3N2-11A
## ENSG00000187634 11.22 3.73 6.65
## ENSG00000188976 72.45 40.22 47.38
## ENSG00000187961 2.37 0.95 1.99
## ENSG00000187583 0.45 0.15 0.51
## ENSG00000187642 0.18 0.00 0.23
## ENSG00000188290 0.45 0.38 0.91
## TCGA-EL-A3T3-11A TCGA-BJ-A45G-01A TCGA-EL-A3CY-01A
## ENSG00000187634 12.64 12.29 13.47
## ENSG00000188976 55.97 58.84 56.38
## ENSG00000187961 3.20 3.74 5.23
## ENSG00000187583 0.27 4.54 4.11
## ENSG00000187642 0.17 1.77 1.51
## ENSG00000188290 1.32 3.98 3.34
## TCGA-EL-A3ZR-11A TCGA-EL-A3T7-11A TCGA-CE-A3ME-01A
## ENSG00000187634 8.19 8.02 23.22
## ENSG00000188976 50.14 52.78 85.82
## ENSG00000187961 3.98 3.63 5.20
## ENSG00000187583 1.43 0.43 6.15
## ENSG00000187642 3.28 0.35 2.50
## ENSG00000188290 3.12 1.08 4.86
## TCGA-EL-A3T6-11A TCGA-KS-A4ID-01A TCGA-ET-A3BP-01A
## ENSG00000187634 16.15 18.25 23.52
## ENSG00000188976 63.30 68.62 67.54
## ENSG00000187961 4.39 5.07 3.97
## ENSG00000187583 1.37 9.35 6.72
## ENSG00000187642 0.39 4.15 3.35
## ENSG00000188290 8.25 8.86 4.34
## TCGA-EL-A3ZG-11A TCGA-ET-A2MX-11C TCGA-BJ-A18Z-01A
## ENSG00000187634 17.86 11.05 17.47
## ENSG00000188976 65.49 57.54 60.89
## ENSG00000187961 3.26 2.95 6.96
## ENSG00000187583 0.21 0.46 13.35
## ENSG00000187642 0.21 0.19 5.96
## ENSG00000188290 6.33 0.97 4.99
## TCGA-GE-A2C6-11A TCGA-BJ-A2N9-11A TCGA-ET-A39N-01A
## ENSG00000187634 7.45 12.69 21.60
## ENSG00000188976 50.85 66.84 89.06
## ENSG00000187961 2.53 3.63 2.63
## ENSG00000187583 0.69 1.10 1.52
## ENSG00000187642 0.45 0.38 0.89
## ENSG00000188290 1.03 1.05 4.04
## TCGA-DJ-A2QC-01A
## ENSG00000187634 59.59
## ENSG00000188976 115.98
## ENSG00000187961 5.86
## ENSG00000187583 11.45
## ENSG00000187642 6.50
## ENSG00000188290 12.30
###########################
cpm_table_log <- as.data.frame(round(log10(cpm(edge_n)+1),2))
head(cpm_table_log)
## TCGA-DJ-A2Q1-01A TCGA-H2-A2K9-11A TCGA-H2-A3RI-11A
## ENSG00000187634 1.43 0.90 0.99
## ENSG00000188976 1.89 1.68 1.69
## ENSG00000187961 0.80 0.56 0.59
## ENSG00000187583 1.12 0.21 0.16
## ENSG00000187642 1.07 0.10 0.06
## ENSG00000188290 0.50 0.21 0.29
## TCGA-DJ-A4V4-01A TCGA-EL-A3T9-01A TCGA-BJ-A0ZB-01A
## ENSG00000187634 1.41 1.27 1.18
## ENSG00000188976 1.88 1.94 1.86
## ENSG00000187961 0.86 1.02 0.62
## ENSG00000187583 1.05 1.18 0.87
## ENSG00000187642 0.81 0.77 0.53
## ENSG00000188290 0.92 0.66 0.62
## TCGA-EL-A3H1-01A TCGA-H2-A3RI-01A TCGA-EL-A3GR-01A
## ENSG00000187634 1.08 1.42 1.19
## ENSG00000188976 1.79 1.86 1.74
## ENSG00000187961 0.63 0.84 0.79
## ENSG00000187583 0.23 0.99 1.04
## ENSG00000187642 0.06 0.58 0.55
## ENSG00000188290 0.66 0.60 0.92
## TCGA-DJ-A3UN-01A TCGA-DO-A1JZ-01A TCGA-EL-A3GU-01A
## ENSG00000187634 1.32 1.55 1.12
## ENSG00000188976 1.94 1.92 1.78
## ENSG00000187961 0.84 0.78 0.76
## ENSG00000187583 1.09 0.80 1.00
## ENSG00000187642 0.76 0.43 0.57
## ENSG00000188290 0.67 0.63 1.15
## TCGA-DJ-A1QM-01A TCGA-EM-A1CT-11A TCGA-EL-A3CZ-01A
## ENSG00000187634 1.24 1.07 0.84
## ENSG00000188976 1.79 1.77 1.70
## ENSG00000187961 0.64 0.46 0.53
## ENSG00000187583 0.77 0.12 0.46
## ENSG00000187642 0.43 0.09 0.19
## ENSG00000188290 0.44 0.39 0.30
## TCGA-EL-A3ZS-11A TCGA-J8-A3YE-01A TCGA-ET-A2N4-01A
## ENSG00000187634 1.48 1.11 1.61
## ENSG00000188976 1.91 1.80 2.01
## ENSG00000187961 0.96 0.99 0.86
## ENSG00000187583 0.42 0.93 1.21
## ENSG00000187642 0.39 0.51 0.88
## ENSG00000188290 1.25 1.03 0.95
## TCGA-EM-A4FO-01A TCGA-EL-A3ZO-11A TCGA-ET-A4KN-01A
## ENSG00000187634 1.33 1.13 2.31
## ENSG00000188976 1.83 1.70 2.38
## ENSG00000187961 0.93 0.60 0.87
## ENSG00000187583 0.95 0.24 1.31
## ENSG00000187642 0.70 0.14 1.46
## ENSG00000188290 0.99 1.07 1.30
## TCGA-EL-A3T1-11A TCGA-ET-A2N5-11B TCGA-BJ-A2N8-11A
## ENSG00000187634 1.11 1.03 0.99
## ENSG00000188976 1.74 1.82 1.80
## ENSG00000187961 0.57 0.54 0.44
## ENSG00000187583 0.32 0.04 0.26
## ENSG00000187642 0.18 0.05 0.10
## ENSG00000188290 0.62 0.27 0.20
## TCGA-FY-A3I4-01A TCGA-EL-A3ZQ-11A TCGA-DJ-A2Q2-01A
## ENSG00000187634 1.36 1.05 1.10
## ENSG00000188976 1.83 1.75 1.86
## ENSG00000187961 0.69 0.76 0.62
## ENSG00000187583 1.09 0.21 0.48
## ENSG00000187642 0.68 0.11 0.22
## ENSG00000188290 0.99 0.47 0.47
## TCGA-EL-A3ZP-11A TCGA-EL-A3GQ-01A TCGA-EL-A3TA-11A
## ENSG00000187634 0.97 1.48 1.04
## ENSG00000188976 1.70 1.80 1.81
## ENSG00000187961 0.68 0.74 0.57
## ENSG00000187583 0.28 0.81 0.11
## ENSG00000187642 0.12 0.46 0.08
## ENSG00000188290 0.52 0.39 0.23
## TCGA-CE-A481-01A TCGA-E8-A438-01A TCGA-EL-A3H7-11A
## ENSG00000187634 1.10 1.80 1.08
## ENSG00000188976 1.78 2.00 1.81
## ENSG00000187961 0.85 1.13 0.56
## ENSG00000187583 0.74 1.33 0.31
## ENSG00000187642 0.46 1.10 0.16
## ENSG00000188290 0.76 1.40 0.58
## TCGA-EL-A3GZ-11A TCGA-E8-A413-01A TCGA-BJ-A28X-11A
## ENSG00000187634 0.98 1.27 1.06
## ENSG00000188976 1.74 1.82 1.74
## ENSG00000187961 0.42 0.86 0.88
## ENSG00000187583 0.08 0.99 0.39
## ENSG00000187642 0.04 0.57 0.13
## ENSG00000188290 0.31 1.23 0.22
## TCGA-EM-A2CS-01A TCGA-ET-A3BX-01A TCGA-DJ-A13V-01A
## ENSG00000187634 1.36 1.64 1.56
## ENSG00000188976 1.87 1.92 1.94
## ENSG00000187961 0.66 0.76 0.73
## ENSG00000187583 0.70 1.32 1.15
## ENSG00000187642 0.37 0.91 0.76
## ENSG00000188290 0.62 0.88 0.80
## TCGA-KS-A41I-11A TCGA-E8-A44K-01A TCGA-ET-A3DW-11A
## ENSG00000187634 1.05 1.11 1.04
## ENSG00000188976 1.77 1.82 1.76
## ENSG00000187961 0.82 0.90 0.45
## ENSG00000187583 0.44 0.98 0.25
## ENSG00000187642 0.23 0.61 0.11
## ENSG00000188290 0.73 0.88 0.46
## TCGA-EL-A3ZL-11A TCGA-EL-A4JV-01A TCGA-FE-A231-01A
## ENSG00000187634 0.98 1.11 0.91
## ENSG00000188976 1.78 1.79 1.82
## ENSG00000187961 0.56 0.72 0.65
## ENSG00000187583 0.20 0.51 0.99
## ENSG00000187642 0.14 0.23 0.55
## ENSG00000188290 0.23 0.86 0.42
## TCGA-BJ-A28R-11A TCGA-EM-A1CS-11A TCGA-FE-A3PA-01A
## ENSG00000187634 1.01 1.14 1.32
## ENSG00000188976 1.71 1.80 1.78
## ENSG00000187961 0.55 0.50 1.09
## ENSG00000187583 0.24 0.12 1.06
## ENSG00000187642 0.08 0.08 1.01
## ENSG00000188290 0.41 0.36 1.36
## TCGA-EM-A2CQ-01A TCGA-EL-A3TB-11A TCGA-BJ-A28W-11A
## ENSG00000187634 1.01 1.12 1.31
## ENSG00000188976 1.82 1.77 1.91
## ENSG00000187961 0.47 0.41 0.69
## ENSG00000187583 0.51 0.05 0.70
## ENSG00000187642 0.21 0.04 0.52
## ENSG00000188290 0.62 0.20 0.90
## TCGA-KS-A41J-11A TCGA-EL-A3T2-11A TCGA-KS-A41L-11A
## ENSG00000187634 0.90 0.94 1.00
## ENSG00000188976 1.70 1.76 1.76
## ENSG00000187961 0.66 0.58 0.75
## ENSG00000187583 0.21 0.10 0.13
## ENSG00000187642 0.06 0.11 0.08
## ENSG00000188290 0.57 0.26 0.47
## TCGA-EL-A3T0-11A TCGA-DJ-A3UP-01A TCGA-DJ-A4V0-01A
## ENSG00000187634 1.00 1.05 1.18
## ENSG00000188976 1.75 1.76 1.75
## ENSG00000187961 0.58 0.71 0.84
## ENSG00000187583 0.13 0.75 0.68
## ENSG00000187642 0.07 0.42 0.33
## ENSG00000188290 0.29 0.30 0.53
## TCGA-EL-A3ZK-11A TCGA-BJ-A3PT-01A TCGA-EM-A3OA-01A
## ENSG00000187634 1.21 1.66 1.22
## ENSG00000188976 1.74 1.92 1.82
## ENSG00000187961 0.79 0.72 0.76
## ENSG00000187583 0.45 0.78 0.36
## ENSG00000187642 0.24 0.59 0.14
## ENSG00000188290 0.36 0.67 0.82
## TCGA-FY-A3TY-11A TCGA-EL-A3MY-11A TCGA-FY-A3R9-01A
## ENSG00000187634 0.97 1.17 1.18
## ENSG00000188976 1.73 1.71 1.78
## ENSG00000187961 0.70 0.55 0.57
## ENSG00000187583 0.10 0.21 0.34
## ENSG00000187642 0.07 0.07 0.16
## ENSG00000188290 0.34 0.58 0.39
## TCGA-BJ-A3PR-11A TCGA-EL-A3H2-11A TCGA-ET-A2MY-01A
## ENSG00000187634 1.72 0.99 1.50
## ENSG00000188976 2.01 1.74 1.96
## ENSG00000187961 0.81 0.42 0.66
## ENSG00000187583 0.34 0.10 0.99
## ENSG00000187642 0.37 0.02 0.67
## ENSG00000188290 0.74 0.16 0.62
## TCGA-ET-A3DP-11A TCGA-L6-A4ET-01A TCGA-BJ-A3PU-11A
## ENSG00000187634 0.97 1.70 1.09
## ENSG00000188976 1.78 2.03 1.77
## ENSG00000187961 0.24 1.06 0.57
## ENSG00000187583 0.07 1.26 0.15
## ENSG00000187642 0.03 0.84 0.08
## ENSG00000188290 0.34 1.03 0.33
## TCGA-DO-A2HM-01B TCGA-E8-A2JQ-11A TCGA-EL-A3ZT-11A
## ENSG00000187634 0.98 1.06 0.87
## ENSG00000188976 1.77 1.73 1.73
## ENSG00000187961 0.90 0.68 0.69
## ENSG00000187583 0.84 0.34 0.13
## ENSG00000187642 0.46 0.16 0.05
## ENSG00000188290 0.45 0.31 0.23
## TCGA-EL-A3MW-11A TCGA-BJ-A28T-01A TCGA-DO-A1JZ-11A
## ENSG00000187634 1.04 1.00 0.97
## ENSG00000188976 1.71 1.70 1.85
## ENSG00000187961 0.41 0.57 0.43
## ENSG00000187583 0.12 0.50 0.10
## ENSG00000187642 0.02 0.25 0.02
## ENSG00000188290 0.39 0.31 0.22
## TCGA-EL-A3N3-11A TCGA-BJ-A290-11A TCGA-E8-A242-01A
## ENSG00000187634 0.99 0.99 1.41
## ENSG00000188976 1.72 1.67 1.83
## ENSG00000187961 0.54 0.42 0.67
## ENSG00000187583 0.17 0.10 1.26
## ENSG00000187642 0.09 0.03 0.84
## ENSG00000188290 0.29 0.22 0.74
## TCGA-ET-A39L-01A TCGA-EL-A3MY-01A TCGA-EL-A3T8-11A
## ENSG00000187634 1.16 1.31 1.11
## ENSG00000188976 1.84 1.82 1.73
## ENSG00000187961 0.54 0.79 0.69
## ENSG00000187583 0.65 1.22 0.29
## ENSG00000187642 0.38 0.80 0.19
## ENSG00000188290 0.56 1.04 0.57
## TCGA-EM-A1CV-11A TCGA-EM-A1CW-11A TCGA-EL-A3N2-11A
## ENSG00000187634 1.09 0.68 0.88
## ENSG00000188976 1.87 1.62 1.68
## ENSG00000187961 0.53 0.29 0.48
## ENSG00000187583 0.16 0.06 0.18
## ENSG00000187642 0.07 0.00 0.09
## ENSG00000188290 0.16 0.14 0.28
## TCGA-EL-A3T3-11A TCGA-BJ-A45G-01A TCGA-EL-A3CY-01A
## ENSG00000187634 1.13 1.12 1.16
## ENSG00000188976 1.76 1.78 1.76
## ENSG00000187961 0.62 0.68 0.79
## ENSG00000187583 0.11 0.74 0.71
## ENSG00000187642 0.07 0.44 0.40
## ENSG00000188290 0.37 0.70 0.64
## TCGA-EL-A3ZR-11A TCGA-EL-A3T7-11A TCGA-CE-A3ME-01A
## ENSG00000187634 0.96 0.96 1.38
## ENSG00000188976 1.71 1.73 1.94
## ENSG00000187961 0.70 0.67 0.79
## ENSG00000187583 0.39 0.15 0.85
## ENSG00000187642 0.63 0.13 0.54
## ENSG00000188290 0.61 0.32 0.77
## TCGA-EL-A3T6-11A TCGA-KS-A4ID-01A TCGA-ET-A3BP-01A
## ENSG00000187634 1.23 1.28 1.39
## ENSG00000188976 1.81 1.84 1.84
## ENSG00000187961 0.73 0.78 0.70
## ENSG00000187583 0.38 1.01 0.89
## ENSG00000187642 0.14 0.71 0.64
## ENSG00000188290 0.97 0.99 0.73
## TCGA-EL-A3ZG-11A TCGA-ET-A2MX-11C TCGA-BJ-A18Z-01A
## ENSG00000187634 1.28 1.08 1.27
## ENSG00000188976 1.82 1.77 1.79
## ENSG00000187961 0.63 0.60 0.90
## ENSG00000187583 0.08 0.16 1.16
## ENSG00000187642 0.08 0.07 0.84
## ENSG00000188290 0.87 0.29 0.78
## TCGA-GE-A2C6-11A TCGA-BJ-A2N9-11A TCGA-ET-A39N-01A
## ENSG00000187634 0.93 1.14 1.35
## ENSG00000188976 1.71 1.83 1.95
## ENSG00000187961 0.55 0.67 0.56
## ENSG00000187583 0.23 0.32 0.40
## ENSG00000187642 0.16 0.14 0.28
## ENSG00000188290 0.31 0.31 0.70
## TCGA-DJ-A2QC-01A
## ENSG00000187634 1.78
## ENSG00000188976 2.07
## ENSG00000187961 0.84
## ENSG00000187583 1.10
## ENSG00000187642 0.87
## ENSG00000188290 1.12
We can now define the design to perform the DE analysis. We build a model without the intercept.
desing <- model.matrix(~0+group, data = edge_n$samples) # group correspond to the data
colnames(desing) <- levels(edge_n$samples$group)
rownames(desing) <- edge_n$samples$sample
desing
## case control
## TCGA-DJ-A2Q1-01A 1 0
## TCGA-H2-A2K9-11A 0 1
## TCGA-H2-A3RI-11A 0 1
## TCGA-DJ-A4V4-01A 1 0
## TCGA-EL-A3T9-01A 1 0
## TCGA-BJ-A0ZB-01A 1 0
## TCGA-EL-A3H1-01A 1 0
## TCGA-H2-A3RI-01A 1 0
## TCGA-EL-A3GR-01A 1 0
## TCGA-DJ-A3UN-01A 1 0
## TCGA-DO-A1JZ-01A 1 0
## TCGA-EL-A3GU-01A 1 0
## TCGA-DJ-A1QM-01A 1 0
## TCGA-EM-A1CT-11A 0 1
## TCGA-EL-A3CZ-01A 1 0
## TCGA-EL-A3ZS-11A 0 1
## TCGA-J8-A3YE-01A 1 0
## TCGA-ET-A2N4-01A 1 0
## TCGA-EM-A4FO-01A 1 0
## TCGA-EL-A3ZO-11A 0 1
## TCGA-ET-A4KN-01A 1 0
## TCGA-EL-A3T1-11A 0 1
## TCGA-ET-A2N5-11B 0 1
## TCGA-BJ-A2N8-11A 0 1
## TCGA-FY-A3I4-01A 1 0
## TCGA-EL-A3ZQ-11A 0 1
## TCGA-DJ-A2Q2-01A 1 0
## TCGA-EL-A3ZP-11A 0 1
## TCGA-EL-A3GQ-01A 1 0
## TCGA-EL-A3TA-11A 0 1
## TCGA-CE-A481-01A 1 0
## TCGA-E8-A438-01A 1 0
## TCGA-EL-A3H7-11A 0 1
## TCGA-EL-A3GZ-11A 0 1
## TCGA-E8-A413-01A 1 0
## TCGA-BJ-A28X-11A 0 1
## TCGA-EM-A2CS-01A 1 0
## TCGA-ET-A3BX-01A 1 0
## TCGA-DJ-A13V-01A 1 0
## TCGA-KS-A41I-11A 0 1
## TCGA-E8-A44K-01A 1 0
## TCGA-ET-A3DW-11A 0 1
## TCGA-EL-A3ZL-11A 0 1
## TCGA-EL-A4JV-01A 1 0
## TCGA-FE-A231-01A 1 0
## TCGA-BJ-A28R-11A 0 1
## TCGA-EM-A1CS-11A 0 1
## TCGA-FE-A3PA-01A 1 0
## TCGA-EM-A2CQ-01A 1 0
## TCGA-EL-A3TB-11A 0 1
## TCGA-BJ-A28W-11A 0 1
## TCGA-KS-A41J-11A 0 1
## TCGA-EL-A3T2-11A 0 1
## TCGA-KS-A41L-11A 0 1
## TCGA-EL-A3T0-11A 0 1
## TCGA-DJ-A3UP-01A 1 0
## TCGA-DJ-A4V0-01A 1 0
## TCGA-EL-A3ZK-11A 0 1
## TCGA-BJ-A3PT-01A 1 0
## TCGA-EM-A3OA-01A 1 0
## TCGA-FY-A3TY-11A 0 1
## TCGA-EL-A3MY-11A 0 1
## TCGA-FY-A3R9-01A 1 0
## TCGA-BJ-A3PR-11A 0 1
## TCGA-EL-A3H2-11A 0 1
## TCGA-ET-A2MY-01A 1 0
## TCGA-ET-A3DP-11A 0 1
## TCGA-L6-A4ET-01A 1 0
## TCGA-BJ-A3PU-11A 0 1
## TCGA-DO-A2HM-01B 1 0
## TCGA-E8-A2JQ-11A 0 1
## TCGA-EL-A3ZT-11A 0 1
## TCGA-EL-A3MW-11A 0 1
## TCGA-BJ-A28T-01A 1 0
## TCGA-DO-A1JZ-11A 0 1
## TCGA-EL-A3N3-11A 0 1
## TCGA-BJ-A290-11A 0 1
## TCGA-E8-A242-01A 1 0
## TCGA-ET-A39L-01A 1 0
## TCGA-EL-A3MY-01A 1 0
## TCGA-EL-A3T8-11A 0 1
## TCGA-EM-A1CV-11A 0 1
## TCGA-EM-A1CW-11A 0 1
## TCGA-EL-A3N2-11A 0 1
## TCGA-EL-A3T3-11A 0 1
## TCGA-BJ-A45G-01A 1 0
## TCGA-EL-A3CY-01A 1 0
## TCGA-EL-A3ZR-11A 0 1
## TCGA-EL-A3T7-11A 0 1
## TCGA-CE-A3ME-01A 1 0
## TCGA-EL-A3T6-11A 0 1
## TCGA-KS-A4ID-01A 1 0
## TCGA-ET-A3BP-01A 1 0
## TCGA-EL-A3ZG-11A 0 1
## TCGA-ET-A2MX-11C 0 1
## TCGA-BJ-A18Z-01A 1 0
## TCGA-GE-A2C6-11A 0 1
## TCGA-BJ-A2N9-11A 0 1
## TCGA-ET-A39N-01A 1 0
## TCGA-DJ-A2QC-01A 1 0
## attr(,"assign")
## [1] 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
Then, we estimate the dispersion providing to the command estimateDisp the object containing the normalized factors and the design. The obtained object is used to fit the data.
edge_d <- estimateDisp(edge_n,design = desing)
View(edge_d)
We can now fit the data using glmQLFIT to retrieve the p-values. The data is modeled using a negative binomial distribution.
edge_f <- glmQLFit(edge_d,design = desing)
View(edge_f)
## Warning in (function (..., row.names = NULL, check.rows = FALSE, check.names =
## TRUE, : row names were found from a short variable and have been discarded
## Warning in format.data.frame(x0): corrupt data frame: columns will be truncated
## or padded with NAs
We define the contrast, which corresponds to the conditions to be compared.
contro <- makeContrasts('case-control', levels = desing)
We fit again the model using the contrast created.
edge_t <- glmQLFTest(edge_f,contrast = contro) # contrin the results of the DE Analysis
View(edge_t)
We sort the result by fold change by using the function TopTags and expressing a cutoff and sorting.
DEGs <- as.data.frame(topTags(edge_t,n = 16748 ,p.value = 0.01,sort.by = 'logFC')) # 16748 becasue w e took into consideration all the vatiables inside edge_t, so 99 percent of singificativity of the test
View(DEGs)
We add a new column to the DEGs dataframe called class. Used to express the values of the fold change of the transcripts. The selection is based on the log fold change ratio (>1.5 for up-regulated genes and < (-1.5) for down-regulated genes) and a log CPM (>1 for both cases).
DEGs$class <- '='
DEGs$class[which(DEGs$logCPM > 1 & DEGs$logFC > 1.5)] = '+'
DEGs$class[which(DEGs$logCPM > 1 & DEGs$logFC < (-1.5))] = '-'
DEGs <- DEGs[order(DEGs$logFC, decreasing = T),] # we order based on the fold change
View(DEGs)
table(DEGs$class)
##
## - + =
## 245 618 7927
Display the results using a volcano plot (x-axes: log FoldChange, y-axes: inverse function of the p-value). We can see the most significant DEGs colored in green, which are genes that surpass a threshold set on both the p-value and the Fold Change. We can see that there are a lot of DEGs, both up (right side of the plot) or down (left side of the plot) expressed.
input_df<-DEGs
xlabel<- "log2 FC control vs case"
ylabel<-"-log10 p-value"
par(fig=c(0,1, 0,1), mar=c(4,4,1,2), mgp=c(2, 0.75,0))
plot(DEGs$logFC,-log(DEGs$PValue, base=10), xlab=xlabel,ylab = ylabel, col=ifelse(DEGs$class=="=", "grey70", "olivedrab4"), pch=20, frame.plot=TRUE, cex=0.8, main="Volcano plot") %>%
abline(v = 0, lty = 2, col="grey20")
We can also represent the genes using a heatmap. A clustering process is operated. We plot only up or down expressed genes using data from both the normalized CPM and the log transformation of the CPM table. We can see that thanks to clusterization, chunks of expressed genes are outlined. Specifically, in the case of the CPM log table, a more refinement clusterization is observed, and minor contamination at the level of the division between “case” samples and “control” samples is observed. We need to remember that contamination that took place during the collection of tissue samples is able to influence the clustering process. From both plots a clear difference in expression between tumor and control samples is observed, indicating that the expression of the genes differs between the two cases, as expected since we are performing a DEGs analysis.
col <- rep('chartreuse4', 100)
col[which(c_anno_df$condition == 'case')] <- 'burlywood3'
pal <- c('blue','white','red')
pal <- colorRampPalette(pal)(100)
heatmap(as.matrix(cpm_table[which(rownames(cpm_table) %in% DEGs$ensembl_gene_id[which(DEGs$class != '=')]),]),ColSideColors = col, cexCol = 0.5,margins = c(4,4), col = pal, cexRow = 0.2)
heatmap(as.matrix(cpm_table_log[which(rownames(cpm_table_log) %in% DEGs$ensembl_gene_id[which(DEGs$class != '=')]),]),ColSideColors = col, cexCol = 0.5, margins = c(4,4), col = pal, cexRow = 0.2)
library(clusterProfiler)
##
## clusterProfiler v4.8.1 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:purrr':
##
## simplify
## The following object is masked from 'package:biomaRt':
##
## select
## The following object is masked from 'package:stats':
##
## filter
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following objects are masked from 'package:lubridate':
##
## second, second<-
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
##
## slice
## The following object is masked from 'package:lubridate':
##
## %within%
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
We create the convert dataframe, that is used to map the ensembl_gene_id to eneterzgene_id and the external_gene_name
convert<-getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "external_gene_name"), filter=c("ensembl_gene_id"), values=DEGs$ensembl_gene_id, mart=ensembl)
We add the information of convert in the initial file of DEGs by using the command merge.
DEGs<-merge(DEGs, convert, by.x = "ensembl_gene_id", by.y = "ensembl_gene_id")
Then we remove the NA and the duplicates inside the new DEGs.
DEGs<-DEGs[which(!is.na(DEGs$entrezgene_id)),]
DEGs<-DEGs[-which(duplicated(DEGs$entrezgene_id)),]
We create a new list of only the up-regulated genes.
UPDegs <- DEGs %>% filter(class == '+') # up regulated
Perform Gene Ontology enrichment analysis (biological process). We use the function enrichGO by providing a list of symbols of up-regulated genes and the database of the human species (=gene model). Then we explicit the wanted ontology, here BP for biological process and the methods to adjust the p-value.
ego_BP_UP <- enrichGO(gene = UPDegs$external_gene_name.x, OrgDb = org.Hs.eg.db, keyType = 'SYMBOL', ont = 'BP',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05)
View(ego_BP_UP)
barplot(ego_BP_UP,showCategory = 10) # first ten enriched terms
dotplot(ego_BP_UP, showCategory= 10) # first ten enriched terms
heatplot(ego_BP_UP, showCategory = 2) # gene associated with the top two enriched terms
head(ego_BP_UP,10)
## ID Description
## GO:0045229 GO:0045229 external encapsulating structure organization
## GO:0030198 GO:0030198 extracellular matrix organization
## GO:0043062 GO:0043062 extracellular structure organization
## GO:0030199 GO:0030199 collagen fibril organization
## GO:0050808 GO:0050808 synapse organization
## GO:0043588 GO:0043588 skin development
## GO:0034329 GO:0034329 cell junction assembly
## GO:0098742 GO:0098742 cell-cell adhesion via plasma-membrane adhesion molecules
## GO:0042060 GO:0042060 wound healing
## GO:0051960 GO:0051960 regulation of nervous system development
## GeneRatio BgRatio pvalue p.adjust qvalue
## GO:0045229 43/559 317/18614 1.028456e-16 4.371965e-13 3.746826e-13
## GO:0030198 42/559 314/18614 3.944334e-16 6.271993e-13 5.375173e-13
## GO:0043062 42/559 315/18614 4.426248e-16 6.271993e-13 5.375173e-13
## GO:0030199 18/559 64/18614 3.008218e-13 3.196984e-10 2.739853e-10
## GO:0050808 43/559 466/18614 6.797067e-11 5.778866e-08 4.952557e-08
## GO:0043588 32/559 308/18614 1.089958e-09 7.722353e-07 6.618149e-07
## GO:0034329 39/559 444/18614 2.220585e-09 1.348530e-06 1.155706e-06
## GO:0098742 29/559 278/18614 6.070815e-09 3.225879e-06 2.764617e-06
## GO:0042060 37/559 439/18614 1.738908e-08 8.213444e-06 7.039020e-06
## GO:0051960 38/559 461/18614 2.015553e-08 8.568115e-06 7.342977e-06
## geneID
## GO:0045229 COL11A1/FAP/ADAMTS2/TGM1/MMP11/COMP/HPN/TGFBR1/SPOCK2/COL1A1/ADTRP/COL7A1/QSOX1/TGFBI/COL10A1/PXDN/COL5A1/POSTN/LOXL2/ADAMTS7/MMP7/CYP1B1/LOXL4/ADAMTS14/ADAMTS12/MMP16/RUNX1/ELF3/ADAMTS9/COL1A2/SPINT1/BMP1/COL3A1/KLK7/COL8A2/TPSAB1/TMPRSS6/COL13A1/DPP4/COL5A2/PRSS1/ZNF469/PRSS2
## GO:0030198 COL11A1/FAP/ADAMTS2/MMP11/COMP/HPN/TGFBR1/SPOCK2/COL1A1/ADTRP/COL7A1/QSOX1/TGFBI/COL10A1/PXDN/COL5A1/POSTN/LOXL2/ADAMTS7/MMP7/CYP1B1/LOXL4/ADAMTS14/ADAMTS12/MMP16/RUNX1/ELF3/ADAMTS9/COL1A2/SPINT1/BMP1/COL3A1/KLK7/COL8A2/TPSAB1/TMPRSS6/COL13A1/DPP4/COL5A2/PRSS1/ZNF469/PRSS2
## GO:0043062 COL11A1/FAP/ADAMTS2/MMP11/COMP/HPN/TGFBR1/SPOCK2/COL1A1/ADTRP/COL7A1/QSOX1/TGFBI/COL10A1/PXDN/COL5A1/POSTN/LOXL2/ADAMTS7/MMP7/CYP1B1/LOXL4/ADAMTS14/ADAMTS12/MMP16/RUNX1/ELF3/ADAMTS9/COL1A2/SPINT1/BMP1/COL3A1/KLK7/COL8A2/TPSAB1/TMPRSS6/COL13A1/DPP4/COL5A2/PRSS1/ZNF469/PRSS2
## GO:0030199 COL11A1/ADAMTS2/MMP11/COMP/TGFBR1/COL1A1/PXDN/COL5A1/LOXL2/ADAMTS7/CYP1B1/LOXL4/ADAMTS14/ADAMTS12/COL1A2/BMP1/COL3A1/COL5A2
## GO:0050808 SEMA3F/PLXND1/ITGA3/TNC/CBLN4/LZTS1/NGEF/CACNB1/NRCAM/PALM/CBLN1/ICAM5/SPOCK2/GLRB/CDH6/NRP2/C3/APOE/DLG4/LRP4/PLXNC1/AMIGO2/NTRK3/TMEM108/GABRB2/SDK1/SEMA3E/CDH2/GAP43/SPTBN2/SEZ6L2/DOK7/SLITRK4/F2R/EPHB3/THBS2/SLIT1/SHISA6/LRRK2/IL1RAP/PLXNA4/ELFN1/SRCIN1
## GO:0043588 ITGA3/CDH3/NGFR/TP63/ADAMTS2/TGM1/COMP/MET/COL1A1/KRT17/COL5A1/ITGB4/LRP4/WNT10A/SCEL/POU2F3/CD109/GRHL3/IVL/CLDN1/ITGA2/FOXQ1/COL1A2/KRT80/COL3A1/SFN/ETV4/ALOX15B/GJB3/RYR1/COL5A2/CDSN
## GO:0034329 PLXND1/CBLN4/PKP2/LZTS1/CDH3/NRCAM/CBLN1/ICAM5/SPOCK2/CDH6/CLDN16/FN1/SDC4/ITGB4/LRP4/APLNR/CLDN10/AMIGO2/NTRK3/CDH11/CDH13/GABRB2/SDK1/CDH22/CLDN1/ITGA2/CLDN2/CDH2/GAP43/SPTBN2/CDH4/SLITRK4/EPHB3/GJC1/THBS2/SLIT1/GJA4/IL1RAP/CLDN9
## GO:0098742 ITGA3/TENM1/CDH3/CEACAM6/FAT2/FXYD5/ICAM1/CBLN1/MAG/CDH6/CLDN16/DSC3/CLDN10/AMIGO2/CDH11/CDH13/NECTIN4/SDK1/MPZL2/CDH22/CLDN1/CLDN2/RET/CDH2/CDH4/IL1RAP/DCHS2/CD177/CLDN9
## GO:0042060 TNFRSF12A/PLAUR/ALOX5/ENTPD2/CDH3/ERBB3/RAB27A/FAP/TYRO3/TIMP1/CCN4/COMP/TGFBR1/COL1A1/ADTRP/FGF1/PLAU/SDC4/APOE/COL5A1/EMILIN2/ENTPD1/CD109/GRHL3/TGFA/CLDN1/ITGA2/COL3A1/LRG1/SAA1/F2R/ADRA2C/PROS1/SERPINA1/F5/TAFA5/EPPK1
## GO:0051960 SEMA3F/PLXND1/TRPC5/BRINP1/E2F1/CBLN1/MAG/WNT3/CTSC/MDK/FN1/MYRF/NR1D1/DLG4/BMAL1/LRP4/HEY2/PLXNC1/AMIGO2/NTRK3/RXRG/CRABP2/HES6/TNFRSF21/CDKN2B/TIAM1/SPINT1/SEMA6B/SEMA3E/CDH4/SLITRK4/PLAG1/EPHB3/THBS2/SLIT1/IL1RAP/TGM2/PLXNA4
## Count
## GO:0045229 43
## GO:0030198 42
## GO:0043062 42
## GO:0030199 18
## GO:0050808 43
## GO:0043588 32
## GO:0034329 39
## GO:0098742 29
## GO:0042060 37
## GO:0051960 38
Perform Gene Ontology enrichment analysis (molecular function). We use the function enrichGO by providing a list of symbols of up-regulated genes and the database of the human species (=gene model). Then we explicit the wanted ontology, here MF for molecule function and the methods to adjust the p-value.
ego_MF_UP <- enrichGO(gene = UPDegs$external_gene_name.x, OrgDb = org.Hs.eg.db, keyType = 'SYMBOL', ont = 'MF',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05)
View(ego_MF_UP)
barplot(ego_MF_UP,showCategory = 10) # first ten enriched terms
dotplot(ego_MF_UP, showCategory= 10 ) # first ten enriched terms
heatplot(ego_MF_UP, showCategory = 2) # gene associated with the top two enriched terms
head(ego_MF_UP,10)
## ID
## GO:0005201 GO:0005201
## GO:0004252 GO:0004252
## GO:0008236 GO:0008236
## GO:0017171 GO:0017171
## GO:0005178 GO:0005178
## GO:0008201 GO:0008201
## GO:0005539 GO:0005539
## GO:0019838 GO:0019838
## GO:0030020 GO:0030020
## GO:0004175 GO:0004175
## Description
## GO:0005201 extracellular matrix structural constituent
## GO:0004252 serine-type endopeptidase activity
## GO:0008236 serine-type peptidase activity
## GO:0017171 serine hydrolase activity
## GO:0005178 integrin binding
## GO:0008201 heparin binding
## GO:0005539 glycosaminoglycan binding
## GO:0019838 growth factor binding
## GO:0030020 extracellular matrix structural constituent conferring tensile strength
## GO:0004175 endopeptidase activity
## GeneRatio BgRatio pvalue p.adjust qvalue
## GO:0005201 29/568 167/18369 3.738783e-14 2.587238e-11 2.290497e-11
## GO:0004252 24/568 170/18369 5.317169e-10 1.839741e-07 1.628733e-07
## GO:0008236 24/568 190/18369 5.196659e-09 1.168591e-06 1.034560e-06
## GO:0017171 24/568 194/18369 7.887344e-09 1.168591e-06 1.034560e-06
## GO:0005178 21/568 151/18369 8.443578e-09 1.168591e-06 1.034560e-06
## GO:0008201 22/568 168/18369 1.159636e-08 1.337447e-06 1.184050e-06
## GO:0005539 26/568 232/18369 1.525165e-08 1.507735e-06 1.334806e-06
## GO:0019838 18/568 132/18369 1.330448e-07 1.150838e-05 1.018843e-05
## GO:0030020 10/568 41/18369 3.488460e-07 2.682238e-05 2.374601e-05
## GO:0004175 34/568 428/18369 5.318010e-07 3.680063e-05 3.257981e-05
## geneID
## GO:0005201 VCAN/TNC/LTBP1/COL11A1/COMP/COL1A1/COL7A1/FN1/MFAP2/TGFBI/COL10A1/PXDN/COL5A1/EMILIN2/CHI3L1/POSTN/MFGE8/ECM1/HAPLN1/EDIL3/COL1A2/CTHRC1/COL3A1/COL8A2/THBS2/LAMB3/COL13A1/COL5A2/SPON1
## GO:0004252 FAP/MMP11/CTSH/HPN/CTSC/PLAU/PCSK2/KLK10/TMPRSS4/MMP7/PRSS23/PRSS12/KLK6/KLK11/BMP1/KLK7/TPSAB1/TMPRSS6/TPSB2/DPP4/PRSS1/CFI/HP/PRSS2
## GO:0008236 FAP/MMP11/CTSH/HPN/CTSC/PLAU/PCSK2/KLK10/TMPRSS4/MMP7/PRSS23/PRSS12/KLK6/KLK11/BMP1/KLK7/TPSAB1/TMPRSS6/TPSB2/DPP4/PRSS1/CFI/HP/PRSS2
## GO:0017171 FAP/MMP11/CTSH/HPN/CTSC/PLAU/PCSK2/KLK10/TMPRSS4/MMP7/PRSS23/PRSS12/KLK6/KLK11/BMP1/KLK7/TPSAB1/TMPRSS6/TPSB2/DPP4/PRSS1/CFI/HP/PRSS2
## GO:0005178 ITGA3/FAP/ICAM1/CCN4/ICAM4/ICAM5/COMP/THBS4/FGF1/FN1/TGFBI/COL5A1/ITGB4/ITGA11/MFGE8/ITGA2/EDIL3/ESM1/COL3A1/FRMD5/CD177
## GO:0008201 AOC1/TENM1/COL11A1/RSPO4/CCN4/COMP/MDK/THBS4/FGF1/FN1/NRP2/APOE/COL5A1/POSTN/FGFBP1/LIPH/CXCL8/SAA1/LPL/THBS2/SLIT1/COL13A1
## GO:0005539 AOC1/TENM1/VCAN/COL11A1/RSPO4/CCN4/COMP/NOD1/SPOCK2/MDK/THBS4/FGF1/FN1/NRP2/APOE/COL5A1/POSTN/FGFBP1/HAPLN1/LIPH/CXCL8/SAA1/LPL/THBS2/SLIT1/COL13A1
## GO:0019838 LTBP1/NGFR/ERBB3/TGFBR1/COL1A1/IGFBP2/NRP2/PXDN/COL5A1/ITGB4/KL/FGFBP1/NTRK3/IGFBP3/CD109/COL1A2/IGFBP6/COL3A1
## GO:0030020 COL11A1/COL1A1/COL7A1/COL10A1/COL5A1/COL1A2/COL3A1/COL8A2/COL13A1/COL5A2
## GO:0004175 FAP/ADAMTS2/MMP11/PHEX/CTSH/HPN/CTSC/ECE1/PLAU/PCSK2/KLK10/NAPSA/ADAMTS7/TMPRSS4/MMP7/ADAMTS14/ADAM12/PRSS23/ADAMTS12/MMP16/ADAMTS9/PRSS12/KLK6/KLK11/BMP1/KLK7/TPSAB1/TMPRSS6/TPSB2/DPP4/PRSS1/CFI/HP/PRSS2
## Count
## GO:0005201 29
## GO:0004252 24
## GO:0008236 24
## GO:0017171 24
## GO:0005178 21
## GO:0008201 22
## GO:0005539 26
## GO:0019838 18
## GO:0030020 10
## GO:0004175 34
We perform KEGG enrichment analysis. We use function enrichWP to retrieve the list of genes from the wiki pathways, we can see which pathways are more expressed.
library(pathview)
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
##
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
eWP_UP<-enrichWP(gene=UPDegs$entrezgene_id, organism ='Homo sapiens',pvalueCutoff = 0.05, qvalueCutoff = 0.1)
head(eWP_UP, 10)
## ID
## WP2877 WP2877
## WP5055 WP5055
## WP558 WP558
## WP4239 WP4239
## WP4541 WP4541
## WP3859 WP3859
## WP5078 WP5078
## WP3967 WP3967
## WP1742 WP1742
## WP3624 WP3624
## Description
## WP2877 Vitamin D receptor pathway
## WP5055 Burn wound healing
## WP558 Complement and coagulation cascades
## WP4239 Epithelial to mesenchymal transition in colorectal cancer
## WP4541 Hippo-Merlin signaling dysregulation
## WP3859 TGF-beta signaling in thyroid cells for epithelial-mesenchymal transition
## WP5078 T cell modulation in pancreatic cancer
## WP3967 miR-509-3p alteration of YAP1/ECM axis
## WP1742 TP53 network
## WP3624 Lung fibrosis
## GeneRatio BgRatio pvalue p.adjust qvalue
## WP2877 23/339 187/8276 2.103724e-06 0.0008667341 0.0007972005
## WP5055 16/339 113/8276 1.307238e-05 0.0026929106 0.0024768723
## WP558 10/339 58/8276 1.050652e-04 0.0109511969 0.0100726389
## WP4239 18/339 162/8276 1.063223e-04 0.0109511969 0.0100726389
## WP4541 14/339 121/8276 4.066609e-04 0.0286490588 0.0263506928
## WP3859 5/339 17/8276 4.609168e-04 0.0286490588 0.0263506928
## WP5078 8/339 46/8276 4.867559e-04 0.0286490588 0.0263506928
## WP3967 5/339 18/8276 6.169568e-04 0.0317732742 0.0292242685
## WP1742 5/339 19/8276 8.094653e-04 0.0370555234 0.0340827502
## WP3624 9/339 64/8276 1.097255e-03 0.0452069165 0.0415801992
## geneID
## WP2877 240/1591/639/595/50486/54210/9365/3486/1030/1029/10568/6280/9075/29785/5653/147/1469/1474/283229/6275/6273/1305/6277
## WP5055 3371/50507/3383/3956/7076/10392/1277/2335/8796/9547/8038/6282/6280/1278/3576/6277
## WP558 5329/1191/5328/718/2149/5627/1604/5265/2153/3426
## WP4239 5318/7991/7046/101929777/7473/10686/2335/8828/9518/9071/80326/56649/9076/94234/9075/1000/3714/9080
## WP4541 3675/1001/4804/4233/595/1004/3691/22801/1009/1012/64405/3673/1000/1002
## WP3859 3371/1004/2335/860/1000
## WP5078 2191/3956/6367/6361/3958/79679/4907/953
## WP3967 1277/2335/1289/1281/7058
## WP1742 637/8626/27113/5366/1029
## WP3624 7076/2246/653509/5328/57105/7039/3576/729238/5265
## Count
## WP2877 23
## WP5055 16
## WP558 10
## WP4239 18
## WP4541 14
## WP3859 5
## WP5078 8
## WP3967 5
## WP1742 5
## WP3624 9
Then we proceed with the same processes but for down-regulated: So we create a new list of only the down-regulated genes.
DWDegs <- DEGs %>% filter(class == '-')
# biologica process of GO analysis
ego_BP_DW <- enrichGO(gene = DWDegs$external_gene_name.x, OrgDb = org.Hs.eg.db, keyType = 'SYMBOL', ont = 'BP',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05)
View(ego_BP_DW)
barplot(ego_BP_DW,showCategory = 10)
dotplot(ego_BP_DW, showCategory= 10) # orderby default x
heatplot(ego_BP_DW, showCategory = 2)
head(ego_BP_DW,10)
## ID Description GeneRatio
## GO:0030198 GO:0030198 extracellular matrix organization 14/217
## GO:0043062 GO:0043062 extracellular structure organization 14/217
## GO:0045229 GO:0045229 external encapsulating structure organization 14/217
## GO:0015671 GO:0015671 oxygen transport 4/217
## GO:0019755 GO:0019755 one-carbon compound transport 6/217
## GO:0001503 GO:0001503 ossification 16/217
## GO:0015669 GO:0015669 gas transport 4/217
## GO:0097553 GO:0097553 calcium ion transmembrane import into cytosol 10/217
## BgRatio pvalue p.adjust qvalue
## GO:0030198 314/18614 2.014588e-05 0.02166984 0.01930285
## GO:0043062 315/18614 2.086855e-05 0.02166984 0.01930285
## GO:0045229 317/18614 2.238273e-05 0.02166984 0.01930285
## GO:0015671 16/18614 2.929346e-05 0.02166984 0.01930285
## GO:0019755 55/18614 4.217124e-05 0.02261553 0.02014524
## GO:0001503 429/18614 4.585778e-05 0.02261553 0.02014524
## GO:0015669 22/18614 1.114462e-04 0.04710989 0.04196410
## GO:0097553 201/18614 1.287023e-04 0.04760376 0.04240401
## geneID
## GO:0030198 COL23A1/COL9A3/SMOC2/CCN2/BMP2/MATN2/DPT/CSGALNACT1/ABI3BP/TNFRSF11B/WDR72/KLK4/COL4A6/VIT
## GO:0043062 COL23A1/COL9A3/SMOC2/CCN2/BMP2/MATN2/DPT/CSGALNACT1/ABI3BP/TNFRSF11B/WDR72/KLK4/COL4A6/VIT
## GO:0045229 COL23A1/COL9A3/SMOC2/CCN2/BMP2/MATN2/DPT/CSGALNACT1/ABI3BP/TNFRSF11B/WDR72/KLK4/COL4A6/VIT
## GO:0015671 IPCEF1/HBA2/HBA1/HBB
## GO:0019755 SLC4A4/SLC26A4/SLC26A7/HBA2/HBA1/HBB
## GO:0001503 LTF/MYOC/WNT11/CHRDL1/ZBTB16/CCN2/EGR2/BMP2/GPC3/CSGALNACT1/RANBP3L/ADGRV1/ROR2/BMP8A/RORB/GDF10
## GO:0015669 IPCEF1/HBA2/HBA1/HBB
## GO:0097553 P2RX5/PLCH1/CCN2/CCL21/ANK2/ITPR1/GRIN2C/HAP1/CD19/RYR2
## Count
## GO:0030198 14
## GO:0043062 14
## GO:0045229 14
## GO:0015671 4
## GO:0019755 6
## GO:0001503 16
## GO:0015669 4
## GO:0097553 10
# molecular function GO
ego_MF_DW <- enrichGO(gene = DWDegs$external_gene_name.x, OrgDb = org.Hs.eg.db, keyType = 'SYMBOL', ont = 'MF',pAdjustMethod = 'BH',pvalueCutoff = 0.05, qvalueCutoff = 0.05)
View(ego_MF_DW)
barplot(ego_MF_DW,showCategory = 10)
dotplot(ego_MF_DW, showCategory= 10 )
heatplot(ego_MF_DW, showCategory = 2)
head(ego_MF_DW,10)
## ID Description
## GO:0005201 GO:0005201 extracellular matrix structural constituent
## GO:0005344 GO:0005344 oxygen carrier activity
## GO:0004714 GO:0004714 transmembrane receptor protein tyrosine kinase activity
## GO:0031720 GO:0031720 haptoglobin binding
## GO:0004713 GO:0004713 protein tyrosine kinase activity
## GO:0043177 GO:0043177 organic acid binding
## GO:0022803 GO:0022803 passive transmembrane transporter activity
## GO:0019199 GO:0019199 transmembrane receptor protein kinase activity
## GO:0004601 GO:0004601 peroxidase activity
## GO:0005539 GO:0005539 glycosaminoglycan binding
## GeneRatio BgRatio pvalue p.adjust qvalue
## GO:0005201 11/228 167/18369 7.827874e-06 0.003561683 0.003188829
## GO:0005344 4/228 14/18369 2.098801e-05 0.004774771 0.004274925
## GO:0004714 6/228 60/18369 9.795986e-05 0.014857245 0.013301917
## GO:0031720 3/228 10/18369 2.123498e-04 0.021775459 0.019495900
## GO:0004713 8/228 137/18369 3.156591e-04 0.021775459 0.019495900
## GO:0043177 10/228 212/18369 3.277564e-04 0.021775459 0.019495900
## GO:0022803 16/228 479/18369 3.350071e-04 0.021775459 0.019495900
## GO:0019199 6/228 77/18369 3.894296e-04 0.022148807 0.019830164
## GO:0004601 5/228 55/18369 5.919256e-04 0.028884076 0.025860353
## GO:0005539 10/228 232/18369 6.640351e-04 0.028884076 0.025860353
## geneID
## GO:0005201 COL23A1/COL9A3/EFEMP1/MATN2/MMRN1/FRAS1/DPT/ABI3BP/SBSPON/PODN/COL4A6
## GO:0005344 IPCEF1/HBA2/HBA1/HBB
## GO:0004714 EPHA3/EFEMP1/EPHA5/EPHB1/KIT/ROR2
## GO:0031720 HBA2/HBA1/HBB
## GO:0004713 EPHA3/RPS6KA5/EFEMP1/BLK/EPHA5/EPHB1/KIT/ROR2
## GO:0043177 AKR1C2/CRABP1/FABP4/GLDC/AKR1C1/SERPINA5/HBA2/RYR2/HBA1/HBB
## GO:0022803 KCNK2/TRPM3/P2RX5/CHRNA4/OCA2/KCNA1/KCNJ13/GJB6/SLC26A7/GPM6A/ITPR1/GRIN2C/CLCNKB/KCNIP4/RYR2/SLC5A8
## GO:0019199 EPHA3/EFEMP1/EPHA5/EPHB1/KIT/ROR2
## GO:0004601 IPCEF1/TPO/HBA2/HBA1/HBB
## GO:0005539 LTF/COL23A1/SMOC2/CCN2/LYVE1/RSPO3/PCOLCE2/SERPINA5/LAYN/VIT
## Count
## GO:0005201 11
## GO:0005344 4
## GO:0004714 6
## GO:0031720 3
## GO:0004713 8
## GO:0043177 10
## GO:0022803 16
## GO:0019199 6
## GO:0004601 5
## GO:0005539 10
# KEGG enrichment
# enrichWP -> retrive the list of genes from the wiki pathways
eWP_DW<-enrichWP(gene=DWDegs$entrezgene_id, organism ='Homo sapiens',pvalueCutoff = 0.05, qvalueCutoff = 0.1)
head(eWP_DW, 10)
## [1] ID Description GeneRatio BgRatio pvalue p.adjust
## [7] qvalue geneID Count
## <0 rows> (or 0-length row.names)
We can use the library pathway and specifically the function pathway that generates a local cartoon of the pathway of interest underlying the DEGs genes using specific colors. A measure of the DE is needed, here we use the Log Fold Change. The pathway we represent is chosen by looking at the top 10 expressed pathways from point 4, specifically, we selected the ‘Vitamin D receptor pathway’ for which we found in https://www.genome.jp/entry/hsa:7421 the corresponding pathway ID code hsa04928 for Parathyroid hormone synthesis, secretion, and action.
library(pathview)
logFC <-UPDegs$logFC
names(logFC) <-UPDegs$entrezgene_id
pathview(gene.data = logFC, pathway.id = 'hsa04928',species = 'human')
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /home/andrea/Desktop/magistrale_Qcb/2master_QCB_second_semester_first_year/Bioinformatics_resource/Bioinformatics_Romanel_project-
## Info: Writing image file hsa04928.pathview.png
We choose to search for transcription factors (TFs) with enriched scores in the promoters of all up-regulated genes. We first retrieved the sequences of the promoters using the function getSequence and explicating the promoter identifier in the parameter seqType, using a window of 500 nucleotides upstream. Then, we used the DNAStringSet function to create a DNAString object, needed to operate the motif enrichment. We can now operate the enrichment and thanks to the function groupReport we obtain the TF enrichments across many sequences.
Seq_up <- getSequence(id = UPDegs$external_gene_name.x, type = 'hgnc_symbol',seqType = 'gene_flank', upstream = 500,mart = ensembl)
# gene_flank rapresnet the promoters of the genes
View(Seq_up)
library(MotifDb) # an annotateed collection of motifs
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'XVector'
## The following object is masked from 'package:purrr':
##
## compact
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## See system.file("LICENSE", package="MotifDb") for use restrictions.
new_Seq <- DNAStringSet(Seq_up$gene_flank)
library(PWMEnrich) # for pattern matching
library(PWMEnrich.Hsapiens.background) # package containing background models
library(seqLogo) # plotting the seqLogo of the corresponding motifs
## Loading required package: grid
##
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
##
## pattern
data("PWMLogn.hg19.MotifDb.Hsap")
res_initial = motifEnrichment(new_Seq,PWMLogn.hg19.MotifDb.Hsap,score = 'affinity')
## Scanning sequence 1 / 688
## Scanning sequence 2 / 688
## Scanning sequence 3 / 688
## Scanning sequence 4 / 688
## Scanning sequence 5 / 688
## Scanning sequence 6 / 688
## Scanning sequence 7 / 688
## Scanning sequence 8 / 688
## Scanning sequence 9 / 688
## Scanning sequence 10 / 688
## Scanning sequence 11 / 688
## Scanning sequence 12 / 688
## Scanning sequence 13 / 688
## Scanning sequence 14 / 688
## Scanning sequence 15 / 688
## Scanning sequence 16 / 688
## Scanning sequence 17 / 688
## Scanning sequence 18 / 688
## Scanning sequence 19 / 688
## Scanning sequence 20 / 688
## Scanning sequence 21 / 688
## Scanning sequence 22 / 688
## Scanning sequence 23 / 688
## Scanning sequence 24 / 688
## Scanning sequence 25 / 688
## Scanning sequence 26 / 688
## Scanning sequence 27 / 688
## Scanning sequence 28 / 688
## Scanning sequence 29 / 688
## Scanning sequence 30 / 688
## Scanning sequence 31 / 688
## Scanning sequence 32 / 688
## Scanning sequence 33 / 688
## Scanning sequence 34 / 688
## Scanning sequence 35 / 688
## Scanning sequence 36 / 688
## Scanning sequence 37 / 688
## Scanning sequence 38 / 688
## Scanning sequence 39 / 688
## Scanning sequence 40 / 688
## Scanning sequence 41 / 688
## Scanning sequence 42 / 688
## Scanning sequence 43 / 688
## Scanning sequence 44 / 688
## Scanning sequence 45 / 688
## Scanning sequence 46 / 688
## Scanning sequence 47 / 688
## Scanning sequence 48 / 688
## Scanning sequence 49 / 688
## Scanning sequence 50 / 688
## Scanning sequence 51 / 688
## Scanning sequence 52 / 688
## Scanning sequence 53 / 688
## Scanning sequence 54 / 688
## Scanning sequence 55 / 688
## Scanning sequence 56 / 688
## Scanning sequence 57 / 688
## Scanning sequence 58 / 688
## Scanning sequence 59 / 688
## Scanning sequence 60 / 688
## Scanning sequence 61 / 688
## Scanning sequence 62 / 688
## Scanning sequence 63 / 688
## Scanning sequence 64 / 688
## Scanning sequence 65 / 688
## Scanning sequence 66 / 688
## Scanning sequence 67 / 688
## Scanning sequence 68 / 688
## Scanning sequence 69 / 688
## Scanning sequence 70 / 688
## Scanning sequence 71 / 688
## Scanning sequence 72 / 688
## Scanning sequence 73 / 688
## Scanning sequence 74 / 688
## Scanning sequence 75 / 688
## Scanning sequence 76 / 688
## Scanning sequence 77 / 688
## Scanning sequence 78 / 688
## Scanning sequence 79 / 688
## Scanning sequence 80 / 688
## Scanning sequence 81 / 688
## Scanning sequence 82 / 688
## Scanning sequence 83 / 688
## Scanning sequence 84 / 688
## Scanning sequence 85 / 688
## Scanning sequence 86 / 688
## Scanning sequence 87 / 688
## Scanning sequence 88 / 688
## Scanning sequence 89 / 688
## Scanning sequence 90 / 688
## Scanning sequence 91 / 688
## Scanning sequence 92 / 688
## Scanning sequence 93 / 688
## Scanning sequence 94 / 688
## Scanning sequence 95 / 688
## Scanning sequence 96 / 688
## Scanning sequence 97 / 688
## Scanning sequence 98 / 688
## Scanning sequence 99 / 688
## Scanning sequence 100 / 688
## Scanning sequence 101 / 688
## Scanning sequence 102 / 688
## Scanning sequence 103 / 688
## Scanning sequence 104 / 688
## Scanning sequence 105 / 688
## Scanning sequence 106 / 688
## Scanning sequence 107 / 688
## Scanning sequence 108 / 688
## Scanning sequence 109 / 688
## Scanning sequence 110 / 688
## Scanning sequence 111 / 688
## Scanning sequence 112 / 688
## Scanning sequence 113 / 688
## Scanning sequence 114 / 688
## Scanning sequence 115 / 688
## Scanning sequence 116 / 688
## Scanning sequence 117 / 688
## Scanning sequence 118 / 688
## Scanning sequence 119 / 688
## Scanning sequence 120 / 688
## Scanning sequence 121 / 688
## Scanning sequence 122 / 688
## Scanning sequence 123 / 688
## Scanning sequence 124 / 688
## Scanning sequence 125 / 688
## Scanning sequence 126 / 688
## Scanning sequence 127 / 688
## Scanning sequence 128 / 688
## Scanning sequence 129 / 688
## Scanning sequence 130 / 688
## Scanning sequence 131 / 688
## Scanning sequence 132 / 688
## Scanning sequence 133 / 688
## Scanning sequence 134 / 688
## Scanning sequence 135 / 688
## Scanning sequence 136 / 688
## Scanning sequence 137 / 688
## Scanning sequence 138 / 688
## Scanning sequence 139 / 688
## Scanning sequence 140 / 688
## Scanning sequence 141 / 688
## Scanning sequence 142 / 688
## Scanning sequence 143 / 688
## Scanning sequence 144 / 688
## Scanning sequence 145 / 688
## Scanning sequence 146 / 688
## Scanning sequence 147 / 688
## Scanning sequence 148 / 688
## Scanning sequence 149 / 688
## Scanning sequence 150 / 688
## Scanning sequence 151 / 688
## Scanning sequence 152 / 688
## Scanning sequence 153 / 688
## Scanning sequence 154 / 688
## Scanning sequence 155 / 688
## Scanning sequence 156 / 688
## Scanning sequence 157 / 688
## Scanning sequence 158 / 688
## Scanning sequence 159 / 688
## Scanning sequence 160 / 688
## Scanning sequence 161 / 688
## Scanning sequence 162 / 688
## Scanning sequence 163 / 688
## Scanning sequence 164 / 688
## Scanning sequence 165 / 688
## Scanning sequence 166 / 688
## Scanning sequence 167 / 688
## Scanning sequence 168 / 688
## Scanning sequence 169 / 688
## Scanning sequence 170 / 688
## Scanning sequence 171 / 688
## Scanning sequence 172 / 688
## Scanning sequence 173 / 688
## Scanning sequence 174 / 688
## Scanning sequence 175 / 688
## Scanning sequence 176 / 688
## Scanning sequence 177 / 688
## Scanning sequence 178 / 688
## Scanning sequence 179 / 688
## Scanning sequence 180 / 688
## Scanning sequence 181 / 688
## Scanning sequence 182 / 688
## Scanning sequence 183 / 688
## Scanning sequence 184 / 688
## Scanning sequence 185 / 688
## Scanning sequence 186 / 688
## Scanning sequence 187 / 688
## Scanning sequence 188 / 688
## Scanning sequence 189 / 688
## Scanning sequence 190 / 688
## Scanning sequence 191 / 688
## Scanning sequence 192 / 688
## Scanning sequence 193 / 688
## Scanning sequence 194 / 688
## Scanning sequence 195 / 688
## Scanning sequence 196 / 688
## Scanning sequence 197 / 688
## Scanning sequence 198 / 688
## Scanning sequence 199 / 688
## Scanning sequence 200 / 688
## Scanning sequence 201 / 688
## Scanning sequence 202 / 688
## Scanning sequence 203 / 688
## Scanning sequence 204 / 688
## Scanning sequence 205 / 688
## Scanning sequence 206 / 688
## Scanning sequence 207 / 688
## Scanning sequence 208 / 688
## Scanning sequence 209 / 688
## Scanning sequence 210 / 688
## Scanning sequence 211 / 688
## Scanning sequence 212 / 688
## Scanning sequence 213 / 688
## Scanning sequence 214 / 688
## Scanning sequence 215 / 688
## Scanning sequence 216 / 688
## Scanning sequence 217 / 688
## Scanning sequence 218 / 688
## Scanning sequence 219 / 688
## Scanning sequence 220 / 688
## Scanning sequence 221 / 688
## Scanning sequence 222 / 688
## Scanning sequence 223 / 688
## Scanning sequence 224 / 688
## Scanning sequence 225 / 688
## Scanning sequence 226 / 688
## Scanning sequence 227 / 688
## Scanning sequence 228 / 688
## Scanning sequence 229 / 688
## Scanning sequence 230 / 688
## Scanning sequence 231 / 688
## Scanning sequence 232 / 688
## Scanning sequence 233 / 688
## Scanning sequence 234 / 688
## Scanning sequence 235 / 688
## Scanning sequence 236 / 688
## Scanning sequence 237 / 688
## Scanning sequence 238 / 688
## Scanning sequence 239 / 688
## Scanning sequence 240 / 688
## Scanning sequence 241 / 688
## Scanning sequence 242 / 688
## Scanning sequence 243 / 688
## Scanning sequence 244 / 688
## Scanning sequence 245 / 688
## Scanning sequence 246 / 688
## Scanning sequence 247 / 688
## Scanning sequence 248 / 688
## Scanning sequence 249 / 688
## Scanning sequence 250 / 688
## Scanning sequence 251 / 688
## Scanning sequence 252 / 688
## Scanning sequence 253 / 688
## Scanning sequence 254 / 688
## Scanning sequence 255 / 688
## Scanning sequence 256 / 688
## Scanning sequence 257 / 688
## Scanning sequence 258 / 688
## Scanning sequence 259 / 688
## Scanning sequence 260 / 688
## Scanning sequence 261 / 688
## Scanning sequence 262 / 688
## Scanning sequence 263 / 688
## Scanning sequence 264 / 688
## Scanning sequence 265 / 688
## Scanning sequence 266 / 688
## Scanning sequence 267 / 688
## Scanning sequence 268 / 688
## Scanning sequence 269 / 688
## Scanning sequence 270 / 688
## Scanning sequence 271 / 688
## Scanning sequence 272 / 688
## Scanning sequence 273 / 688
## Scanning sequence 274 / 688
## Scanning sequence 275 / 688
## Scanning sequence 276 / 688
## Scanning sequence 277 / 688
## Scanning sequence 278 / 688
## Scanning sequence 279 / 688
## Scanning sequence 280 / 688
## Scanning sequence 281 / 688
## Scanning sequence 282 / 688
## Scanning sequence 283 / 688
## Scanning sequence 284 / 688
## Scanning sequence 285 / 688
## Scanning sequence 286 / 688
## Scanning sequence 287 / 688
## Scanning sequence 288 / 688
## Scanning sequence 289 / 688
## Scanning sequence 290 / 688
## Scanning sequence 291 / 688
## Scanning sequence 292 / 688
## Scanning sequence 293 / 688
## Scanning sequence 294 / 688
## Scanning sequence 295 / 688
## Scanning sequence 296 / 688
## Scanning sequence 297 / 688
## Scanning sequence 298 / 688
## Scanning sequence 299 / 688
## Scanning sequence 300 / 688
## Scanning sequence 301 / 688
## Scanning sequence 302 / 688
## Scanning sequence 303 / 688
## Scanning sequence 304 / 688
## Scanning sequence 305 / 688
## Scanning sequence 306 / 688
## Scanning sequence 307 / 688
## Scanning sequence 308 / 688
## Scanning sequence 309 / 688
## Scanning sequence 310 / 688
## Scanning sequence 311 / 688
## Scanning sequence 312 / 688
## Scanning sequence 313 / 688
## Scanning sequence 314 / 688
## Scanning sequence 315 / 688
## Scanning sequence 316 / 688
## Scanning sequence 317 / 688
## Scanning sequence 318 / 688
## Scanning sequence 319 / 688
## Scanning sequence 320 / 688
## Scanning sequence 321 / 688
## Scanning sequence 322 / 688
## Scanning sequence 323 / 688
## Scanning sequence 324 / 688
## Scanning sequence 325 / 688
## Scanning sequence 326 / 688
## Scanning sequence 327 / 688
## Scanning sequence 328 / 688
## Scanning sequence 329 / 688
## Scanning sequence 330 / 688
## Scanning sequence 331 / 688
## Scanning sequence 332 / 688
## Scanning sequence 333 / 688
## Scanning sequence 334 / 688
## Scanning sequence 335 / 688
## Scanning sequence 336 / 688
## Scanning sequence 337 / 688
## Scanning sequence 338 / 688
## Scanning sequence 339 / 688
## Scanning sequence 340 / 688
## Scanning sequence 341 / 688
## Scanning sequence 342 / 688
## Scanning sequence 343 / 688
## Scanning sequence 344 / 688
## Scanning sequence 345 / 688
## Scanning sequence 346 / 688
## Scanning sequence 347 / 688
## Scanning sequence 348 / 688
## Scanning sequence 349 / 688
## Scanning sequence 350 / 688
## Scanning sequence 351 / 688
## Scanning sequence 352 / 688
## Scanning sequence 353 / 688
## Scanning sequence 354 / 688
## Scanning sequence 355 / 688
## Scanning sequence 356 / 688
## Scanning sequence 357 / 688
## Scanning sequence 358 / 688
## Scanning sequence 359 / 688
## Scanning sequence 360 / 688
## Scanning sequence 361 / 688
## Scanning sequence 362 / 688
## Scanning sequence 363 / 688
## Scanning sequence 364 / 688
## Scanning sequence 365 / 688
## Scanning sequence 366 / 688
## Scanning sequence 367 / 688
## Scanning sequence 368 / 688
## Scanning sequence 369 / 688
## Scanning sequence 370 / 688
## Scanning sequence 371 / 688
## Scanning sequence 372 / 688
## Scanning sequence 373 / 688
## Scanning sequence 374 / 688
## Scanning sequence 375 / 688
## Scanning sequence 376 / 688
## Scanning sequence 377 / 688
## Scanning sequence 378 / 688
## Scanning sequence 379 / 688
## Scanning sequence 380 / 688
## Scanning sequence 381 / 688
## Scanning sequence 382 / 688
## Scanning sequence 383 / 688
## Scanning sequence 384 / 688
## Scanning sequence 385 / 688
## Scanning sequence 386 / 688
## Scanning sequence 387 / 688
## Scanning sequence 388 / 688
## Scanning sequence 389 / 688
## Scanning sequence 390 / 688
## Scanning sequence 391 / 688
## Scanning sequence 392 / 688
## Scanning sequence 393 / 688
## Scanning sequence 394 / 688
## Scanning sequence 395 / 688
## Scanning sequence 396 / 688
## Scanning sequence 397 / 688
## Scanning sequence 398 / 688
## Scanning sequence 399 / 688
## Scanning sequence 400 / 688
## Scanning sequence 401 / 688
## Scanning sequence 402 / 688
## Scanning sequence 403 / 688
## Scanning sequence 404 / 688
## Scanning sequence 405 / 688
## Scanning sequence 406 / 688
## Scanning sequence 407 / 688
## Scanning sequence 408 / 688
## Scanning sequence 409 / 688
## Scanning sequence 410 / 688
## Scanning sequence 411 / 688
## Scanning sequence 412 / 688
## Scanning sequence 413 / 688
## Scanning sequence 414 / 688
## Scanning sequence 415 / 688
## Scanning sequence 416 / 688
## Scanning sequence 417 / 688
## Scanning sequence 418 / 688
## Scanning sequence 419 / 688
## Scanning sequence 420 / 688
## Scanning sequence 421 / 688
## Scanning sequence 422 / 688
## Scanning sequence 423 / 688
## Scanning sequence 424 / 688
## Scanning sequence 425 / 688
## Scanning sequence 426 / 688
## Scanning sequence 427 / 688
## Scanning sequence 428 / 688
## Scanning sequence 429 / 688
## Scanning sequence 430 / 688
## Scanning sequence 431 / 688
## Scanning sequence 432 / 688
## Scanning sequence 433 / 688
## Scanning sequence 434 / 688
## Scanning sequence 435 / 688
## Scanning sequence 436 / 688
## Scanning sequence 437 / 688
## Scanning sequence 438 / 688
## Scanning sequence 439 / 688
## Scanning sequence 440 / 688
## Scanning sequence 441 / 688
## Scanning sequence 442 / 688
## Scanning sequence 443 / 688
## Scanning sequence 444 / 688
## Scanning sequence 445 / 688
## Scanning sequence 446 / 688
## Scanning sequence 447 / 688
## Scanning sequence 448 / 688
## Scanning sequence 449 / 688
## Scanning sequence 450 / 688
## Scanning sequence 451 / 688
## Scanning sequence 452 / 688
## Scanning sequence 453 / 688
## Scanning sequence 454 / 688
## Scanning sequence 455 / 688
## Scanning sequence 456 / 688
## Scanning sequence 457 / 688
## Scanning sequence 458 / 688
## Scanning sequence 459 / 688
## Scanning sequence 460 / 688
## Scanning sequence 461 / 688
## Scanning sequence 462 / 688
## Scanning sequence 463 / 688
## Scanning sequence 464 / 688
## Scanning sequence 465 / 688
## Scanning sequence 466 / 688
## Scanning sequence 467 / 688
## Scanning sequence 468 / 688
## Scanning sequence 469 / 688
## Scanning sequence 470 / 688
## Scanning sequence 471 / 688
## Scanning sequence 472 / 688
## Scanning sequence 473 / 688
## Scanning sequence 474 / 688
## Scanning sequence 475 / 688
## Scanning sequence 476 / 688
## Scanning sequence 477 / 688
## Scanning sequence 478 / 688
## Scanning sequence 479 / 688
## Scanning sequence 480 / 688
## Scanning sequence 481 / 688
## Scanning sequence 482 / 688
## Scanning sequence 483 / 688
## Scanning sequence 484 / 688
## Scanning sequence 485 / 688
## Scanning sequence 486 / 688
## Scanning sequence 487 / 688
## Scanning sequence 488 / 688
## Scanning sequence 489 / 688
## Scanning sequence 490 / 688
## Scanning sequence 491 / 688
## Scanning sequence 492 / 688
## Scanning sequence 493 / 688
## Scanning sequence 494 / 688
## Scanning sequence 495 / 688
## Scanning sequence 496 / 688
## Scanning sequence 497 / 688
## Scanning sequence 498 / 688
## Scanning sequence 499 / 688
## Scanning sequence 500 / 688
## Scanning sequence 501 / 688
## Scanning sequence 502 / 688
## Scanning sequence 503 / 688
## Scanning sequence 504 / 688
## Scanning sequence 505 / 688
## Scanning sequence 506 / 688
## Scanning sequence 507 / 688
## Scanning sequence 508 / 688
## Scanning sequence 509 / 688
## Scanning sequence 510 / 688
## Scanning sequence 511 / 688
## Scanning sequence 512 / 688
## Scanning sequence 513 / 688
## Scanning sequence 514 / 688
## Scanning sequence 515 / 688
## Scanning sequence 516 / 688
## Scanning sequence 517 / 688
## Scanning sequence 518 / 688
## Scanning sequence 519 / 688
## Scanning sequence 520 / 688
## Scanning sequence 521 / 688
## Scanning sequence 522 / 688
## Scanning sequence 523 / 688
## Scanning sequence 524 / 688
## Scanning sequence 525 / 688
## Scanning sequence 526 / 688
## Scanning sequence 527 / 688
## Scanning sequence 528 / 688
## Scanning sequence 529 / 688
## Scanning sequence 530 / 688
## Scanning sequence 531 / 688
## Scanning sequence 532 / 688
## Scanning sequence 533 / 688
## Scanning sequence 534 / 688
## Scanning sequence 535 / 688
## Scanning sequence 536 / 688
## Scanning sequence 537 / 688
## Scanning sequence 538 / 688
## Scanning sequence 539 / 688
## Scanning sequence 540 / 688
## Scanning sequence 541 / 688
## Scanning sequence 542 / 688
## Scanning sequence 543 / 688
## Scanning sequence 544 / 688
## Scanning sequence 545 / 688
## Scanning sequence 546 / 688
## Scanning sequence 547 / 688
## Scanning sequence 548 / 688
## Scanning sequence 549 / 688
## Scanning sequence 550 / 688
## Scanning sequence 551 / 688
## Scanning sequence 552 / 688
## Scanning sequence 553 / 688
## Scanning sequence 554 / 688
## Scanning sequence 555 / 688
## Scanning sequence 556 / 688
## Scanning sequence 557 / 688
## Scanning sequence 558 / 688
## Scanning sequence 559 / 688
## Scanning sequence 560 / 688
## Scanning sequence 561 / 688
## Scanning sequence 562 / 688
## Scanning sequence 563 / 688
## Scanning sequence 564 / 688
## Scanning sequence 565 / 688
## Scanning sequence 566 / 688
## Scanning sequence 567 / 688
## Scanning sequence 568 / 688
## Scanning sequence 569 / 688
## Scanning sequence 570 / 688
## Scanning sequence 571 / 688
## Scanning sequence 572 / 688
## Scanning sequence 573 / 688
## Scanning sequence 574 / 688
## Scanning sequence 575 / 688
## Scanning sequence 576 / 688
## Scanning sequence 577 / 688
## Scanning sequence 578 / 688
## Scanning sequence 579 / 688
## Scanning sequence 580 / 688
## Scanning sequence 581 / 688
## Scanning sequence 582 / 688
## Scanning sequence 583 / 688
## Scanning sequence 584 / 688
## Scanning sequence 585 / 688
## Scanning sequence 586 / 688
## Scanning sequence 587 / 688
## Scanning sequence 588 / 688
## Scanning sequence 589 / 688
## Scanning sequence 590 / 688
## Scanning sequence 591 / 688
## Scanning sequence 592 / 688
## Scanning sequence 593 / 688
## Scanning sequence 594 / 688
## Scanning sequence 595 / 688
## Scanning sequence 596 / 688
## Scanning sequence 597 / 688
## Scanning sequence 598 / 688
## Scanning sequence 599 / 688
## Scanning sequence 600 / 688
## Scanning sequence 601 / 688
## Scanning sequence 602 / 688
## Scanning sequence 603 / 688
## Scanning sequence 604 / 688
## Scanning sequence 605 / 688
## Scanning sequence 606 / 688
## Scanning sequence 607 / 688
## Scanning sequence 608 / 688
## Scanning sequence 609 / 688
## Scanning sequence 610 / 688
## Scanning sequence 611 / 688
## Scanning sequence 612 / 688
## Scanning sequence 613 / 688
## Scanning sequence 614 / 688
## Scanning sequence 615 / 688
## Scanning sequence 616 / 688
## Scanning sequence 617 / 688
## Scanning sequence 618 / 688
## Scanning sequence 619 / 688
## Scanning sequence 620 / 688
## Scanning sequence 621 / 688
## Scanning sequence 622 / 688
## Scanning sequence 623 / 688
## Scanning sequence 624 / 688
## Scanning sequence 625 / 688
## Scanning sequence 626 / 688
## Scanning sequence 627 / 688
## Scanning sequence 628 / 688
## Scanning sequence 629 / 688
## Scanning sequence 630 / 688
## Scanning sequence 631 / 688
## Scanning sequence 632 / 688
## Scanning sequence 633 / 688
## Scanning sequence 634 / 688
## Scanning sequence 635 / 688
## Scanning sequence 636 / 688
## Scanning sequence 637 / 688
## Scanning sequence 638 / 688
## Scanning sequence 639 / 688
## Scanning sequence 640 / 688
## Scanning sequence 641 / 688
## Scanning sequence 642 / 688
## Scanning sequence 643 / 688
## Scanning sequence 644 / 688
## Scanning sequence 645 / 688
## Scanning sequence 646 / 688
## Scanning sequence 647 / 688
## Scanning sequence 648 / 688
## Scanning sequence 649 / 688
## Scanning sequence 650 / 688
## Scanning sequence 651 / 688
## Scanning sequence 652 / 688
## Scanning sequence 653 / 688
## Scanning sequence 654 / 688
## Scanning sequence 655 / 688
## Scanning sequence 656 / 688
## Scanning sequence 657 / 688
## Scanning sequence 658 / 688
## Scanning sequence 659 / 688
## Scanning sequence 660 / 688
## Scanning sequence 661 / 688
## Scanning sequence 662 / 688
## Scanning sequence 663 / 688
## Scanning sequence 664 / 688
## Scanning sequence 665 / 688
## Scanning sequence 666 / 688
## Scanning sequence 667 / 688
## Scanning sequence 668 / 688
## Scanning sequence 669 / 688
## Scanning sequence 670 / 688
## Scanning sequence 671 / 688
## Scanning sequence 672 / 688
## Scanning sequence 673 / 688
## Scanning sequence 674 / 688
## Scanning sequence 675 / 688
## Scanning sequence 676 / 688
## Scanning sequence 677 / 688
## Scanning sequence 678 / 688
## Scanning sequence 679 / 688
## Scanning sequence 680 / 688
## Scanning sequence 681 / 688
## Scanning sequence 682 / 688
## Scanning sequence 683 / 688
## Scanning sequence 684 / 688
## Scanning sequence 685 / 688
## Scanning sequence 686 / 688
## Scanning sequence 687 / 688
## Scanning sequence 688 / 688
## Calculating motif enrichment scores ...
TF_enrichment <- groupReport(res_initial)
TF_enrichment
## An object of class 'MotifEnrichmentReport':
## rank target id raw.score p.value
## 1 1 PDLIM5 PDLIM5 4.03024318240181 6.93435108583786e-75
## 2 2 GPD1 GPD1 6.28306364825295 9.89287412384532e-74
## 3 3 JUN M4591_1.02 3.07552583607849 1.46079776308636e-73
## 4 4 JUNB M4623_1.02 3.09492858866266 1.1446527158344e-72
## 5 5 CEBPB M4556_1.02 2.5085205500081 1.65649558189844e-70
## 6 6 MAFK M4573_1.02 2.6564802703635 1.95372765047626e-70
## 7 7 ZDHHC5 ZDHHC5 2.56104904937184 4.51981324514863e-70
## 8 8 AHR M2917_1.02 3.6904461952882 6.41348098968778e-70
## 9 9 SREBF1 M2387_1.02 1.50928254592526 1.8905870707718e-69
## 10 10 MAFF M4572_1.02 2.52431418094361 2.13168795000711e-69
## ... ... ... ... ... ...
## 2287 2019.5 Oct-1 NBT06/Oct-1.pwm 1.43240607704683 1
## top.motif.prop
## 1 0.210755813953488
## 2 0.206395348837209
## 3 0.206395348837209
## 4 0.196220930232558
## 5 0.22093023255814
## 6 0.213662790697674
## 7 0.199127906976744
## 8 0.209302325581395
## 9 0.17296511627907
## 10 0.204941860465116
## ... ...
## 2287 0.0247093023255814
plot(TF_enrichment[1:5]) # the first 5
We chose to analyze the first TF among the top enriched ones. We create an object containing the metadata related to the chosen TF and the corresponding PPM using the function query(). We use the PPM to compute the PWM matrix and the distribution of scores, using the function motifEcdf. Finally, we determined the threshold cutoff at 99.75% applying to the ecdf the quantile function and explicating the corresponding quantile wanted (here 1-25e-4).
tfs<-TF_enrichment$target[1]
tfmotif <- query(MotifDb,tfs)
PWM_tfs <- toPWM(as.list(tfmotif))
ecdf_tfs <- motifEcdf(PWM_tfs, organism = 'hg19', quick = T)
## Starting scanning with motif Hsapiens-hPDI-PDLIM5
## Scanning all sequences with motif 1 / 1
threshlod = log2(quantile(ecdf_tfs$`Hsapiens-hPDI-PDLIM5`, 1-25e-4))
# plot(ecdf_tfs$`Hsapiens-hPDI-PDLIM5`(v = knots(ecdf_tfs$`Hsapiens-hPDI-PDLIM5`)),main = 'ecdf distribution')
threshlod
## 99.75%
## 9.048089
We first compute the scores using the motifScores function. For each PWM we apply the cutoff and we obtain matrices in which the rows represent the promoters and the columns the motifs. Here we obtain a matrix 688 x 1 since we are investigating one motif (=the one of the TF selected in the point above). Each matrix box is associated with a score representing the match of the TF motif to the promoter. We then calculate how good is the TF in binding the promoters, by calculating the frequency of the matches that are above a fixed threshold (the threshold computed in the previous point at 99.75%).
score = motifScores(new_Seq, PWM_tfs, raw.scores = F, cutoff = threshlod)
## Scanning sequence 1 / 688
## Scanning sequence 2 / 688
## Scanning sequence 3 / 688
## Scanning sequence 4 / 688
## Scanning sequence 5 / 688
## Scanning sequence 6 / 688
## Scanning sequence 7 / 688
## Scanning sequence 8 / 688
## Scanning sequence 9 / 688
## Scanning sequence 10 / 688
## Scanning sequence 11 / 688
## Scanning sequence 12 / 688
## Scanning sequence 13 / 688
## Scanning sequence 14 / 688
## Scanning sequence 15 / 688
## Scanning sequence 16 / 688
## Scanning sequence 17 / 688
## Scanning sequence 18 / 688
## Scanning sequence 19 / 688
## Scanning sequence 20 / 688
## Scanning sequence 21 / 688
## Scanning sequence 22 / 688
## Scanning sequence 23 / 688
## Scanning sequence 24 / 688
## Scanning sequence 25 / 688
## Scanning sequence 26 / 688
## Scanning sequence 27 / 688
## Scanning sequence 28 / 688
## Scanning sequence 29 / 688
## Scanning sequence 30 / 688
## Scanning sequence 31 / 688
## Scanning sequence 32 / 688
## Scanning sequence 33 / 688
## Scanning sequence 34 / 688
## Scanning sequence 35 / 688
## Scanning sequence 36 / 688
## Scanning sequence 37 / 688
## Scanning sequence 38 / 688
## Scanning sequence 39 / 688
## Scanning sequence 40 / 688
## Scanning sequence 41 / 688
## Scanning sequence 42 / 688
## Scanning sequence 43 / 688
## Scanning sequence 44 / 688
## Scanning sequence 45 / 688
## Scanning sequence 46 / 688
## Scanning sequence 47 / 688
## Scanning sequence 48 / 688
## Scanning sequence 49 / 688
## Scanning sequence 50 / 688
## Scanning sequence 51 / 688
## Scanning sequence 52 / 688
## Scanning sequence 53 / 688
## Scanning sequence 54 / 688
## Scanning sequence 55 / 688
## Scanning sequence 56 / 688
## Scanning sequence 57 / 688
## Scanning sequence 58 / 688
## Scanning sequence 59 / 688
## Scanning sequence 60 / 688
## Scanning sequence 61 / 688
## Scanning sequence 62 / 688
## Scanning sequence 63 / 688
## Scanning sequence 64 / 688
## Scanning sequence 65 / 688
## Scanning sequence 66 / 688
## Scanning sequence 67 / 688
## Scanning sequence 68 / 688
## Scanning sequence 69 / 688
## Scanning sequence 70 / 688
## Scanning sequence 71 / 688
## Scanning sequence 72 / 688
## Scanning sequence 73 / 688
## Scanning sequence 74 / 688
## Scanning sequence 75 / 688
## Scanning sequence 76 / 688
## Scanning sequence 77 / 688
## Scanning sequence 78 / 688
## Scanning sequence 79 / 688
## Scanning sequence 80 / 688
## Scanning sequence 81 / 688
## Scanning sequence 82 / 688
## Scanning sequence 83 / 688
## Scanning sequence 84 / 688
## Scanning sequence 85 / 688
## Scanning sequence 86 / 688
## Scanning sequence 87 / 688
## Scanning sequence 88 / 688
## Scanning sequence 89 / 688
## Scanning sequence 90 / 688
## Scanning sequence 91 / 688
## Scanning sequence 92 / 688
## Scanning sequence 93 / 688
## Scanning sequence 94 / 688
## Scanning sequence 95 / 688
## Scanning sequence 96 / 688
## Scanning sequence 97 / 688
## Scanning sequence 98 / 688
## Scanning sequence 99 / 688
## Scanning sequence 100 / 688
## Scanning sequence 101 / 688
## Scanning sequence 102 / 688
## Scanning sequence 103 / 688
## Scanning sequence 104 / 688
## Scanning sequence 105 / 688
## Scanning sequence 106 / 688
## Scanning sequence 107 / 688
## Scanning sequence 108 / 688
## Scanning sequence 109 / 688
## Scanning sequence 110 / 688
## Scanning sequence 111 / 688
## Scanning sequence 112 / 688
## Scanning sequence 113 / 688
## Scanning sequence 114 / 688
## Scanning sequence 115 / 688
## Scanning sequence 116 / 688
## Scanning sequence 117 / 688
## Scanning sequence 118 / 688
## Scanning sequence 119 / 688
## Scanning sequence 120 / 688
## Scanning sequence 121 / 688
## Scanning sequence 122 / 688
## Scanning sequence 123 / 688
## Scanning sequence 124 / 688
## Scanning sequence 125 / 688
## Scanning sequence 126 / 688
## Scanning sequence 127 / 688
## Scanning sequence 128 / 688
## Scanning sequence 129 / 688
## Scanning sequence 130 / 688
## Scanning sequence 131 / 688
## Scanning sequence 132 / 688
## Scanning sequence 133 / 688
## Scanning sequence 134 / 688
## Scanning sequence 135 / 688
## Scanning sequence 136 / 688
## Scanning sequence 137 / 688
## Scanning sequence 138 / 688
## Scanning sequence 139 / 688
## Scanning sequence 140 / 688
## Scanning sequence 141 / 688
## Scanning sequence 142 / 688
## Scanning sequence 143 / 688
## Scanning sequence 144 / 688
## Scanning sequence 145 / 688
## Scanning sequence 146 / 688
## Scanning sequence 147 / 688
## Scanning sequence 148 / 688
## Scanning sequence 149 / 688
## Scanning sequence 150 / 688
## Scanning sequence 151 / 688
## Scanning sequence 152 / 688
## Scanning sequence 153 / 688
## Scanning sequence 154 / 688
## Scanning sequence 155 / 688
## Scanning sequence 156 / 688
## Scanning sequence 157 / 688
## Scanning sequence 158 / 688
## Scanning sequence 159 / 688
## Scanning sequence 160 / 688
## Scanning sequence 161 / 688
## Scanning sequence 162 / 688
## Scanning sequence 163 / 688
## Scanning sequence 164 / 688
## Scanning sequence 165 / 688
## Scanning sequence 166 / 688
## Scanning sequence 167 / 688
## Scanning sequence 168 / 688
## Scanning sequence 169 / 688
## Scanning sequence 170 / 688
## Scanning sequence 171 / 688
## Scanning sequence 172 / 688
## Scanning sequence 173 / 688
## Scanning sequence 174 / 688
## Scanning sequence 175 / 688
## Scanning sequence 176 / 688
## Scanning sequence 177 / 688
## Scanning sequence 178 / 688
## Scanning sequence 179 / 688
## Scanning sequence 180 / 688
## Scanning sequence 181 / 688
## Scanning sequence 182 / 688
## Scanning sequence 183 / 688
## Scanning sequence 184 / 688
## Scanning sequence 185 / 688
## Scanning sequence 186 / 688
## Scanning sequence 187 / 688
## Scanning sequence 188 / 688
## Scanning sequence 189 / 688
## Scanning sequence 190 / 688
## Scanning sequence 191 / 688
## Scanning sequence 192 / 688
## Scanning sequence 193 / 688
## Scanning sequence 194 / 688
## Scanning sequence 195 / 688
## Scanning sequence 196 / 688
## Scanning sequence 197 / 688
## Scanning sequence 198 / 688
## Scanning sequence 199 / 688
## Scanning sequence 200 / 688
## Scanning sequence 201 / 688
## Scanning sequence 202 / 688
## Scanning sequence 203 / 688
## Scanning sequence 204 / 688
## Scanning sequence 205 / 688
## Scanning sequence 206 / 688
## Scanning sequence 207 / 688
## Scanning sequence 208 / 688
## Scanning sequence 209 / 688
## Scanning sequence 210 / 688
## Scanning sequence 211 / 688
## Scanning sequence 212 / 688
## Scanning sequence 213 / 688
## Scanning sequence 214 / 688
## Scanning sequence 215 / 688
## Scanning sequence 216 / 688
## Scanning sequence 217 / 688
## Scanning sequence 218 / 688
## Scanning sequence 219 / 688
## Scanning sequence 220 / 688
## Scanning sequence 221 / 688
## Scanning sequence 222 / 688
## Scanning sequence 223 / 688
## Scanning sequence 224 / 688
## Scanning sequence 225 / 688
## Scanning sequence 226 / 688
## Scanning sequence 227 / 688
## Scanning sequence 228 / 688
## Scanning sequence 229 / 688
## Scanning sequence 230 / 688
## Scanning sequence 231 / 688
## Scanning sequence 232 / 688
## Scanning sequence 233 / 688
## Scanning sequence 234 / 688
## Scanning sequence 235 / 688
## Scanning sequence 236 / 688
## Scanning sequence 237 / 688
## Scanning sequence 238 / 688
## Scanning sequence 239 / 688
## Scanning sequence 240 / 688
## Scanning sequence 241 / 688
## Scanning sequence 242 / 688
## Scanning sequence 243 / 688
## Scanning sequence 244 / 688
## Scanning sequence 245 / 688
## Scanning sequence 246 / 688
## Scanning sequence 247 / 688
## Scanning sequence 248 / 688
## Scanning sequence 249 / 688
## Scanning sequence 250 / 688
## Scanning sequence 251 / 688
## Scanning sequence 252 / 688
## Scanning sequence 253 / 688
## Scanning sequence 254 / 688
## Scanning sequence 255 / 688
## Scanning sequence 256 / 688
## Scanning sequence 257 / 688
## Scanning sequence 258 / 688
## Scanning sequence 259 / 688
## Scanning sequence 260 / 688
## Scanning sequence 261 / 688
## Scanning sequence 262 / 688
## Scanning sequence 263 / 688
## Scanning sequence 264 / 688
## Scanning sequence 265 / 688
## Scanning sequence 266 / 688
## Scanning sequence 267 / 688
## Scanning sequence 268 / 688
## Scanning sequence 269 / 688
## Scanning sequence 270 / 688
## Scanning sequence 271 / 688
## Scanning sequence 272 / 688
## Scanning sequence 273 / 688
## Scanning sequence 274 / 688
## Scanning sequence 275 / 688
## Scanning sequence 276 / 688
## Scanning sequence 277 / 688
## Scanning sequence 278 / 688
## Scanning sequence 279 / 688
## Scanning sequence 280 / 688
## Scanning sequence 281 / 688
## Scanning sequence 282 / 688
## Scanning sequence 283 / 688
## Scanning sequence 284 / 688
## Scanning sequence 285 / 688
## Scanning sequence 286 / 688
## Scanning sequence 287 / 688
## Scanning sequence 288 / 688
## Scanning sequence 289 / 688
## Scanning sequence 290 / 688
## Scanning sequence 291 / 688
## Scanning sequence 292 / 688
## Scanning sequence 293 / 688
## Scanning sequence 294 / 688
## Scanning sequence 295 / 688
## Scanning sequence 296 / 688
## Scanning sequence 297 / 688
## Scanning sequence 298 / 688
## Scanning sequence 299 / 688
## Scanning sequence 300 / 688
## Scanning sequence 301 / 688
## Scanning sequence 302 / 688
## Scanning sequence 303 / 688
## Scanning sequence 304 / 688
## Scanning sequence 305 / 688
## Scanning sequence 306 / 688
## Scanning sequence 307 / 688
## Scanning sequence 308 / 688
## Scanning sequence 309 / 688
## Scanning sequence 310 / 688
## Scanning sequence 311 / 688
## Scanning sequence 312 / 688
## Scanning sequence 313 / 688
## Scanning sequence 314 / 688
## Scanning sequence 315 / 688
## Scanning sequence 316 / 688
## Scanning sequence 317 / 688
## Scanning sequence 318 / 688
## Scanning sequence 319 / 688
## Scanning sequence 320 / 688
## Scanning sequence 321 / 688
## Scanning sequence 322 / 688
## Scanning sequence 323 / 688
## Scanning sequence 324 / 688
## Scanning sequence 325 / 688
## Scanning sequence 326 / 688
## Scanning sequence 327 / 688
## Scanning sequence 328 / 688
## Scanning sequence 329 / 688
## Scanning sequence 330 / 688
## Scanning sequence 331 / 688
## Scanning sequence 332 / 688
## Scanning sequence 333 / 688
## Scanning sequence 334 / 688
## Scanning sequence 335 / 688
## Scanning sequence 336 / 688
## Scanning sequence 337 / 688
## Scanning sequence 338 / 688
## Scanning sequence 339 / 688
## Scanning sequence 340 / 688
## Scanning sequence 341 / 688
## Scanning sequence 342 / 688
## Scanning sequence 343 / 688
## Scanning sequence 344 / 688
## Scanning sequence 345 / 688
## Scanning sequence 346 / 688
## Scanning sequence 347 / 688
## Scanning sequence 348 / 688
## Scanning sequence 349 / 688
## Scanning sequence 350 / 688
## Scanning sequence 351 / 688
## Scanning sequence 352 / 688
## Scanning sequence 353 / 688
## Scanning sequence 354 / 688
## Scanning sequence 355 / 688
## Scanning sequence 356 / 688
## Scanning sequence 357 / 688
## Scanning sequence 358 / 688
## Scanning sequence 359 / 688
## Scanning sequence 360 / 688
## Scanning sequence 361 / 688
## Scanning sequence 362 / 688
## Scanning sequence 363 / 688
## Scanning sequence 364 / 688
## Scanning sequence 365 / 688
## Scanning sequence 366 / 688
## Scanning sequence 367 / 688
## Scanning sequence 368 / 688
## Scanning sequence 369 / 688
## Scanning sequence 370 / 688
## Scanning sequence 371 / 688
## Scanning sequence 372 / 688
## Scanning sequence 373 / 688
## Scanning sequence 374 / 688
## Scanning sequence 375 / 688
## Scanning sequence 376 / 688
## Scanning sequence 377 / 688
## Scanning sequence 378 / 688
## Scanning sequence 379 / 688
## Scanning sequence 380 / 688
## Scanning sequence 381 / 688
## Scanning sequence 382 / 688
## Scanning sequence 383 / 688
## Scanning sequence 384 / 688
## Scanning sequence 385 / 688
## Scanning sequence 386 / 688
## Scanning sequence 387 / 688
## Scanning sequence 388 / 688
## Scanning sequence 389 / 688
## Scanning sequence 390 / 688
## Scanning sequence 391 / 688
## Scanning sequence 392 / 688
## Scanning sequence 393 / 688
## Scanning sequence 394 / 688
## Scanning sequence 395 / 688
## Scanning sequence 396 / 688
## Scanning sequence 397 / 688
## Scanning sequence 398 / 688
## Scanning sequence 399 / 688
## Scanning sequence 400 / 688
## Scanning sequence 401 / 688
## Scanning sequence 402 / 688
## Scanning sequence 403 / 688
## Scanning sequence 404 / 688
## Scanning sequence 405 / 688
## Scanning sequence 406 / 688
## Scanning sequence 407 / 688
## Scanning sequence 408 / 688
## Scanning sequence 409 / 688
## Scanning sequence 410 / 688
## Scanning sequence 411 / 688
## Scanning sequence 412 / 688
## Scanning sequence 413 / 688
## Scanning sequence 414 / 688
## Scanning sequence 415 / 688
## Scanning sequence 416 / 688
## Scanning sequence 417 / 688
## Scanning sequence 418 / 688
## Scanning sequence 419 / 688
## Scanning sequence 420 / 688
## Scanning sequence 421 / 688
## Scanning sequence 422 / 688
## Scanning sequence 423 / 688
## Scanning sequence 424 / 688
## Scanning sequence 425 / 688
## Scanning sequence 426 / 688
## Scanning sequence 427 / 688
## Scanning sequence 428 / 688
## Scanning sequence 429 / 688
## Scanning sequence 430 / 688
## Scanning sequence 431 / 688
## Scanning sequence 432 / 688
## Scanning sequence 433 / 688
## Scanning sequence 434 / 688
## Scanning sequence 435 / 688
## Scanning sequence 436 / 688
## Scanning sequence 437 / 688
## Scanning sequence 438 / 688
## Scanning sequence 439 / 688
## Scanning sequence 440 / 688
## Scanning sequence 441 / 688
## Scanning sequence 442 / 688
## Scanning sequence 443 / 688
## Scanning sequence 444 / 688
## Scanning sequence 445 / 688
## Scanning sequence 446 / 688
## Scanning sequence 447 / 688
## Scanning sequence 448 / 688
## Scanning sequence 449 / 688
## Scanning sequence 450 / 688
## Scanning sequence 451 / 688
## Scanning sequence 452 / 688
## Scanning sequence 453 / 688
## Scanning sequence 454 / 688
## Scanning sequence 455 / 688
## Scanning sequence 456 / 688
## Scanning sequence 457 / 688
## Scanning sequence 458 / 688
## Scanning sequence 459 / 688
## Scanning sequence 460 / 688
## Scanning sequence 461 / 688
## Scanning sequence 462 / 688
## Scanning sequence 463 / 688
## Scanning sequence 464 / 688
## Scanning sequence 465 / 688
## Scanning sequence 466 / 688
## Scanning sequence 467 / 688
## Scanning sequence 468 / 688
## Scanning sequence 469 / 688
## Scanning sequence 470 / 688
## Scanning sequence 471 / 688
## Scanning sequence 472 / 688
## Scanning sequence 473 / 688
## Scanning sequence 474 / 688
## Scanning sequence 475 / 688
## Scanning sequence 476 / 688
## Scanning sequence 477 / 688
## Scanning sequence 478 / 688
## Scanning sequence 479 / 688
## Scanning sequence 480 / 688
## Scanning sequence 481 / 688
## Scanning sequence 482 / 688
## Scanning sequence 483 / 688
## Scanning sequence 484 / 688
## Scanning sequence 485 / 688
## Scanning sequence 486 / 688
## Scanning sequence 487 / 688
## Scanning sequence 488 / 688
## Scanning sequence 489 / 688
## Scanning sequence 490 / 688
## Scanning sequence 491 / 688
## Scanning sequence 492 / 688
## Scanning sequence 493 / 688
## Scanning sequence 494 / 688
## Scanning sequence 495 / 688
## Scanning sequence 496 / 688
## Scanning sequence 497 / 688
## Scanning sequence 498 / 688
## Scanning sequence 499 / 688
## Scanning sequence 500 / 688
## Scanning sequence 501 / 688
## Scanning sequence 502 / 688
## Scanning sequence 503 / 688
## Scanning sequence 504 / 688
## Scanning sequence 505 / 688
## Scanning sequence 506 / 688
## Scanning sequence 507 / 688
## Scanning sequence 508 / 688
## Scanning sequence 509 / 688
## Scanning sequence 510 / 688
## Scanning sequence 511 / 688
## Scanning sequence 512 / 688
## Scanning sequence 513 / 688
## Scanning sequence 514 / 688
## Scanning sequence 515 / 688
## Scanning sequence 516 / 688
## Scanning sequence 517 / 688
## Scanning sequence 518 / 688
## Scanning sequence 519 / 688
## Scanning sequence 520 / 688
## Scanning sequence 521 / 688
## Scanning sequence 522 / 688
## Scanning sequence 523 / 688
## Scanning sequence 524 / 688
## Scanning sequence 525 / 688
## Scanning sequence 526 / 688
## Scanning sequence 527 / 688
## Scanning sequence 528 / 688
## Scanning sequence 529 / 688
## Scanning sequence 530 / 688
## Scanning sequence 531 / 688
## Scanning sequence 532 / 688
## Scanning sequence 533 / 688
## Scanning sequence 534 / 688
## Scanning sequence 535 / 688
## Scanning sequence 536 / 688
## Scanning sequence 537 / 688
## Scanning sequence 538 / 688
## Scanning sequence 539 / 688
## Scanning sequence 540 / 688
## Scanning sequence 541 / 688
## Scanning sequence 542 / 688
## Scanning sequence 543 / 688
## Scanning sequence 544 / 688
## Scanning sequence 545 / 688
## Scanning sequence 546 / 688
## Scanning sequence 547 / 688
## Scanning sequence 548 / 688
## Scanning sequence 549 / 688
## Scanning sequence 550 / 688
## Scanning sequence 551 / 688
## Scanning sequence 552 / 688
## Scanning sequence 553 / 688
## Scanning sequence 554 / 688
## Scanning sequence 555 / 688
## Scanning sequence 556 / 688
## Scanning sequence 557 / 688
## Scanning sequence 558 / 688
## Scanning sequence 559 / 688
## Scanning sequence 560 / 688
## Scanning sequence 561 / 688
## Scanning sequence 562 / 688
## Scanning sequence 563 / 688
## Scanning sequence 564 / 688
## Scanning sequence 565 / 688
## Scanning sequence 566 / 688
## Scanning sequence 567 / 688
## Scanning sequence 568 / 688
## Scanning sequence 569 / 688
## Scanning sequence 570 / 688
## Scanning sequence 571 / 688
## Scanning sequence 572 / 688
## Scanning sequence 573 / 688
## Scanning sequence 574 / 688
## Scanning sequence 575 / 688
## Scanning sequence 576 / 688
## Scanning sequence 577 / 688
## Scanning sequence 578 / 688
## Scanning sequence 579 / 688
## Scanning sequence 580 / 688
## Scanning sequence 581 / 688
## Scanning sequence 582 / 688
## Scanning sequence 583 / 688
## Scanning sequence 584 / 688
## Scanning sequence 585 / 688
## Scanning sequence 586 / 688
## Scanning sequence 587 / 688
## Scanning sequence 588 / 688
## Scanning sequence 589 / 688
## Scanning sequence 590 / 688
## Scanning sequence 591 / 688
## Scanning sequence 592 / 688
## Scanning sequence 593 / 688
## Scanning sequence 594 / 688
## Scanning sequence 595 / 688
## Scanning sequence 596 / 688
## Scanning sequence 597 / 688
## Scanning sequence 598 / 688
## Scanning sequence 599 / 688
## Scanning sequence 600 / 688
## Scanning sequence 601 / 688
## Scanning sequence 602 / 688
## Scanning sequence 603 / 688
## Scanning sequence 604 / 688
## Scanning sequence 605 / 688
## Scanning sequence 606 / 688
## Scanning sequence 607 / 688
## Scanning sequence 608 / 688
## Scanning sequence 609 / 688
## Scanning sequence 610 / 688
## Scanning sequence 611 / 688
## Scanning sequence 612 / 688
## Scanning sequence 613 / 688
## Scanning sequence 614 / 688
## Scanning sequence 615 / 688
## Scanning sequence 616 / 688
## Scanning sequence 617 / 688
## Scanning sequence 618 / 688
## Scanning sequence 619 / 688
## Scanning sequence 620 / 688
## Scanning sequence 621 / 688
## Scanning sequence 622 / 688
## Scanning sequence 623 / 688
## Scanning sequence 624 / 688
## Scanning sequence 625 / 688
## Scanning sequence 626 / 688
## Scanning sequence 627 / 688
## Scanning sequence 628 / 688
## Scanning sequence 629 / 688
## Scanning sequence 630 / 688
## Scanning sequence 631 / 688
## Scanning sequence 632 / 688
## Scanning sequence 633 / 688
## Scanning sequence 634 / 688
## Scanning sequence 635 / 688
## Scanning sequence 636 / 688
## Scanning sequence 637 / 688
## Scanning sequence 638 / 688
## Scanning sequence 639 / 688
## Scanning sequence 640 / 688
## Scanning sequence 641 / 688
## Scanning sequence 642 / 688
## Scanning sequence 643 / 688
## Scanning sequence 644 / 688
## Scanning sequence 645 / 688
## Scanning sequence 646 / 688
## Scanning sequence 647 / 688
## Scanning sequence 648 / 688
## Scanning sequence 649 / 688
## Scanning sequence 650 / 688
## Scanning sequence 651 / 688
## Scanning sequence 652 / 688
## Scanning sequence 653 / 688
## Scanning sequence 654 / 688
## Scanning sequence 655 / 688
## Scanning sequence 656 / 688
## Scanning sequence 657 / 688
## Scanning sequence 658 / 688
## Scanning sequence 659 / 688
## Scanning sequence 660 / 688
## Scanning sequence 661 / 688
## Scanning sequence 662 / 688
## Scanning sequence 663 / 688
## Scanning sequence 664 / 688
## Scanning sequence 665 / 688
## Scanning sequence 666 / 688
## Scanning sequence 667 / 688
## Scanning sequence 668 / 688
## Scanning sequence 669 / 688
## Scanning sequence 670 / 688
## Scanning sequence 671 / 688
## Scanning sequence 672 / 688
## Scanning sequence 673 / 688
## Scanning sequence 674 / 688
## Scanning sequence 675 / 688
## Scanning sequence 676 / 688
## Scanning sequence 677 / 688
## Scanning sequence 678 / 688
## Scanning sequence 679 / 688
## Scanning sequence 680 / 688
## Scanning sequence 681 / 688
## Scanning sequence 682 / 688
## Scanning sequence 683 / 688
## Scanning sequence 684 / 688
## Scanning sequence 685 / 688
## Scanning sequence 686 / 688
## Scanning sequence 687 / 688
## Scanning sequence 688 / 688
length(which(apply(score,1,sum) > 0)) /length(score)
## [1] 0.9287791
# we count the boxes that have alt least one score major that the threshold, then divided int for their number to get the frequency
0.9287791 is the result, indicating that the selected TF can bind almost all the promoters with a good affinity, as we expected since we chose the top enriched TF.
We created a .txt file containing all the gene_names of the up-regulated genes identified in the previous points. We then uploaded the file to the STRING tool to obtain and create a network. We finally downloaded the output in .tsv format, the file contains all the information on the edges found between the input genes (=our nodes).
write.table(unique(UPDegs$external_gene_name.x),sep = '\t', file = 'UPDEGs.txt',row.names = F, col.names = F, quote = T)
We first created the node annotations using bioMart. We filtered using the up-regulated genes names and selected as attributes the gene id, the description, the biotype, the start and end position, the chromosome name, and the strand. We then reduced the nodes, eliminating repetitions. We imported the information obtained by STRING and filtered it keeping only the edges between nodes that were present in our node variable. We operated a similar operation eliminating the nodes not found in the link variable. We finally constructed the network using the library igraph, specifically the function graph_from_data_frame, defining as edges the link variable and as vertices the nodes variable. The identification of the largest connected component was possible using the function components() of igraph with which it is possible to specify if we are looking at weakly or strongly connected components. It is therefore possible to obtain the cluster of membership for each node and identify the largest cluster between the defined ones. Here we found cluster 1 as the largest, with a total of 510 nodes, it was possible to extract it from the total graph using the function induced_subgraph, specifying the nodes to extract. Since the total graph and the subgraph were large graphs with >500 nodes we decided to plot them using the ggnet2 function, part of the GGally package. We also decided to resize and color the nodes based on the additional information on the number of outdegree edges.
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:Biostrings':
##
## union
## The following object is masked from 'package:XVector':
##
## path
## The following object is masked from 'package:GenomicRanges':
##
## union
## The following object is masked from 'package:IRanges':
##
## union
## The following object is masked from 'package:S4Vectors':
##
## union
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, path, union
## The following object is masked from 'package:clusterProfiler':
##
## simplify
## The following objects are masked from 'package:lubridate':
##
## %--%, union
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
# we use the general deg
nodes <- getBM(attributes = c('external_gene_name','ensembl_gene_id','description','gene_biotype','start_position','end_position','chromosome_name',
'strand'), filters = c('ensembl_gene_id'), values =UPDegs$ensembl_gene_id, mart = ensembl)
nodes <- unique(nodes[ ,c(1,3:6)]) # colonna 1 piu da 3 a 6
link <- read.delim('string_interactions.tsv') # update the string file
link <- link %>% filter(!link$X.node1 %in% c(setdiff(c(link$node2,link$X.node1), nodes$external_gene_name)) & !link$node2 %in% c(setdiff(c(link$node2,link$X.node1), nodes$external_gene_name)))
nodes <- nodes %>% filter(!nodes$external_gene_name %in% c(setdiff( nodes$external_gene_name,c(link$node2,link$X.node1))))
net <- graph_from_data_frame(d = link , directed = F, vertices = nodes )
net
## IGRAPH 1f6ce73 UN-- 522 3682 --
## + attr: name (v/c), description (v/c), gene_biotype (v/c),
## | start_position (v/n), end_position (v/n), node1_string_id (e/c),
## | node2_string_id (e/c), neighborhood_on_chromosome (e/n), gene_fusion
## | (e/n), phylogenetic_cooccurrence (e/n), homology (e/n), coexpression
## | (e/n), experimentally_determined_interaction (e/n),
## | database_annotated (e/n), automated_textmining (e/n), combined_score
## | (e/n)
## + edges from 1f6ce73 (vertex names):
## [1] SLCO4A1--ABCC3 RXRG --ABTB2 ABTB2 --SPTBN2 FSTL3 --ADAM12
## [5] TIMP1 --ADAM12 COL1A1 --ADAM12 MMP7 --ADAM12 ADAM12 --COL1A2
## + ... omitted several edges
deg_net <- igraph::degree(net, mode ='out')
clrvek_net <- pmax(0,pmin(deg_net,4))
clrname_net <- c('0'='lightgray','1'='#FF1744','2'='#F0A830','3'='#A155B9','4'='#165BAA')
net<-simplify(net)
ggnet2(net,size = 'outdegree', max_size = 3 ,color = clrvek_net, label = T, label.size = 2, legend.position = 'bottom', size.cut = 3, color.legend = 'clr~degree',palette = clrname_net)
c <- igraph::components(net, mode ='strong')
c
## $membership
## SEMA3F AOC1 PLXND1 ITGA3 CRLF1 TNFRSF12A ALDH3B1
## 1 1 1 1 1 1 1
## GGCT TENM1 MLXIPL MRC2 PLAUR ALOX5 MAMLD1
## 1 1 1 1 1 1 1
## BID MARCO CYP24A1 NRXN3 DEPDC1B DAPK2 VCAN
## 1 1 1 1 1 1 1
## RAB27B TNC LTBP1 NFE2L3 ENTPD2 CBLN4 PKP2
## 1 1 1 1 1 1 1
## PRDM1 RASGRF1 RIMBP2 COL11A1 BCAT1 LZTS1 CDH3
## 1 1 1 1 1 1 1
## NGFR ERBB3 NGEF CACNB1 SYT1 RAB27A FSTL3
## 1 1 1 1 1 1 1
## TRPC5 P4HA2 TP63 IGF2BP2 CA12 CACNG4 FAP
## 1 1 1 1 1 1 1
## BRINP1 PGR FOLH1 CEACAM6 FAT2 NOX4 ADAMTS2
## 1 1 1 1 1 1 1
## SULT2B1 FXYD5 ICAM1 NRCAM TGM1 TYRO3 TBX15
## 1 1 1 1 1 1 1
## PALM MMP11 LGALS1 KDELR3 RASD2 SLCO4A1 BIRC7
## 1 1 1 1 1 1 1
## EEF1A2 TRIB3 E2F1 CST4 CELF4 PCSK1N PHEX
## 1 1 1 1 1 1 1
## TIMP1 PIN4 TSC22D1 CBLN1 CCL22 CCL17 CTSH
## 1 1 1 1 1 1 1
## RHOV MYEF2 TUSC3 KCNN4 CLIP3 BBC3 ICAM4
## 1 2 1 1 1 1 1
## ICAM5 SIGLEC6 PDE4C ISYNA1 COMP MAG TMEM59L
## 1 1 1 1 1 1 1
## HPN SCN1B MET NOD1 AGR2 CORO2A TGFBR1
## 1 1 1 1 1 3 1
## NPDC1 UNC5B SPOCK2 WNT3 COL1A1 ABCC3 FAM20A
## 1 1 1 1 1 1 1
## NMU GALNT7 GLRB CTSC B3GAT1 DTX4 CCND1
## 1 1 1 1 1 1 1
## MDK ADTRP TPD52L1 PERP MDFI THBS4 CDH6
## 1 1 1 1 1 1 1
## SLC27A6 FGF1 CLDN16 COL7A1 SLC30A3 FNDC4 EVA1A
## 1 1 1 1 1 1 1
## FN1 IGFBP2 GNLY PASK CD207 QSOX1 HPCAL4
## 1 1 1 1 1 1 1
## ST6GALNAC5 MFAP2 ECE1 GALE NRP2 B4GALT6 NPC2
## 1 1 1 1 1 1 1
## IQSEC3 TGFBI DUSP4 CLU SFTPA1 PLAU COL10A1
## 1 1 1 1 1 1 1
## G0S2 KCNJ2 SLPI SDC4 PSG8 GRM4 TREM1
## 1 1 1 1 1 1 1
## SOX4 RUNX2 MYRF TMEM255A C3 PCSK2 LAMP5
## 1 1 1 1 1 1 1
## CITED1 MGAT3 KRT17 KLK10 APOE PXDN GDF15
## 1 1 1 1 1 1 1
## COL5A1 HIGD1B NAPSA PDLIM4 PPP1R1B LGALS3 RAMP1
## 1 1 1 1 1 1 1
## PTPRE ITGB4 TRIM47 DLG4 VSTM2L CHI3L1 POSTN
## 1 1 4 1 1 1 1
## KL LOXL2 LRP4 DSC3 APLNR C5AR2 CLDN10
## 1 1 1 1 1 1 1
## HRK NT5E GLS2 RDH5 HEY2 WNT10A PLXNC1
## 1 1 1 1 1 1 1
## SCEL RAPGEF5 MYO1G ADAMTS7 GALNT5 GABBR2 FGFBP1
## 1 1 3 1 1 1 1
## TMPRSS4 MMP7 TRIM29 POU2F3 ITGA11 STRA6 CYP1B1
## 1 1 1 1 1 1 1
## TRIM54 LOXL4 DUSP5 ENTPD1 ADAMTS14 HCN4 PDE5A
## 4 1 1 1 1 1 1
## B4GALNT3 ERP27 PIANP AMIGO2 DUSP6 NTRK3 MFGE8
## 1 1 1 1 1 1 1
## CDH11 CDH13 TMC6 PMAIP1 GRB7 FCN3 SYTL1
## 1 1 1 1 1 1 1
## CYP4B1 RXRG CRABP2 XPR1 ECM1 SUSD4 ETNK2
## 1 1 1 1 1 1 5
## HES6 STAC IL17RD TMEM108 HAPLN1 CXCL14 GABRB2
## 1 1 1 1 1 1 1
## TNFRSF21 IGFBP3 SYTL5 IGSF1 ARHGAP36 CDKN2B CDKN2A
## 1 1 1 1 1 1 1
## SHC3 LCN2 ADAM12 ADM MPZL2 CDH22 HMGA2
## 1 1 1 1 1 1 1
## MKX PRSS23 THRSP ADAMTS12 EPS8 GLT1D1 TMEM132D
## 1 1 1 1 1 6 6
## TMEM163 CYSLTR2 SPOCK1 ENPP3 PDE1C ADCY8 MMP16
## 1 1 1 1 1 1 1
## TIAM1 GDF6 CD109 B3GNT7 SLC34A2 GRHL3 XDH
## 1 1 1 1 1 1 1
## SHROOM4 CD1A CDA LAD1 TNNI1 RUNX1 PDE9A
## 1 1 1 1 1 1 1
## S100B S100A1 LY6E SCGB3A1 PLXDC1 PLCD3 AQP5
## 1 1 1 1 1 1 1
## PDZK1IP1 DHRS3 ALPL MXRA8 DIRAS3 KLHDC8A SPATA18
## 1 1 1 1 1 1 1
## INHBB BNIPL CDC42EP3 S100A11 IVL S100A9 TGFA
## 1 1 1 1 1 1 1
## CLDN1 ELF3 ADORA1 ADAMTS9 GRIK3 LIPH MST1R
## 1 1 1 1 1 1 1
## PRSS12 ITGA2 EDIL3 ESM1 FOXQ1 TMEM200A COL1A2
## 1 1 1 1 1 1 1
## CTHRC1 DCSTAMP CLDN2 RET PDZRN4 ABTB2 SPINT1
## 1 1 1 1 1 1 1
## PLD4 TMEM130 BEAN1 ANPEP SCG5 PIP5KL1 CDT1
## 1 1 1 1 1 1 1
## CYP2S1 CDC42EP5 SEMA6B GGT6 KLK6 KLK11 KRT80
## 1 1 1 1 1 1 1
## ANGPTL4 IGFBP6 BMP1 PHYHIP COL3A1 SFTPB KLK7
## 1 1 1 1 1 1 1
## ATF5 CPT1C STK32A CXCL8 TM4SF4 TM4SF1 ADRA1B
## 1 1 1 1 1 1 1
## CST2 CST1 SEMA3E LONRF2 CDH2 KCNS3 HS6ST2
## 1 1 1 1 1 1 1
## LRG1 KRT19 KRT15 APLN ENC1 P2RY6 COL8A2
## 1 1 1 1 1 1 1
## FRMD5 GAP43 FRMD3 TPSAB1 MACROD2 ZMAT3 SYT12
## 1 1 1 1 1 1 1
## SAA1 TNFRSF10C CSPG4 PC C11orf80 SPTBN2 PHLDA3
## 1 1 1 1 6 1 1
## ZCCHC12 GOLT1A RIN1 SEZ6L2 CHST2 CST6 LPL
## 1 5 1 1 1 1 1
## SFN ETV4 CALCB DOK7 RIMS2 FUT2 B3GNT8
## 1 1 1 1 1 1 1
## CTXN1 CDH4 FJX1 SLITRK4 ALOX15B FCER1A B3GNT3
## 1 1 1 1 1 1 1
## F2R PENK CCL13 TNFSF15 PHLDA2 PLAG1 GABRG3
## 1 1 1 1 1 1 1
## EPHB3 NTM C1orf116 GJC1 CSMD1 EPHA10 UPP1
## 1 1 1 1 1 1 1
## OPCML MACC1 IQGAP3 KCNQ3 ADRA2C ALDH1A3 TACSTD2
## 1 1 1 1 1 1 1
## PTP4A3 PROS1 LPAR5 NELL2 JAG2 MANEAL SFTPA2
## 1 1 1 1 1 1 1
## ROR1 MUC1 AHNAK2 NAT8L THBS2 GLDN ESPN
## 1 1 1 1 1 1 1
## TMPRSS6 SLIT1 GJA4 PLEKHN1 PERM1 GABRD TPRG1
## 1 1 1 1 1 1 1
## TMEM215 SLC24A5 IER5L ASAH2 CC2D2B SHISA6 ENTPD8
## 1 2 1 1 1 1 1
## LRRK2 GJB3 FAM111B CXCL17 IL1RAP S100A4 GREB1
## 1 1 1 1 1 1 1
## RYR1 CD55 CPNE4 S100A5 HRH1 S100A2 LAMB3
## 1 1 1 1 1 1 1
## PDLIM7 SIGLEC15 CYSRT1 SERPINA1 TPSB2 DCHS2 COL13A1
## 1 1 1 1 1 1 1
## DPP4 S100A6 C2CD4A F5 PNP PAX9 DMD
## 1 1 1 1 1 1 1
## TGM2 INF2 NHSL2 COL5A2 CDSN PSORS1C1 MUC21
## 1 3 1 1 1 1 1
## HLA-G CD177 PRSS1 CFI C2CD4B CLDN9 PLXNA4
## 1 1 1 1 1 1 1
## NPTXR ZNF469 ELFN1 PSG1 HLA-DQB2 C1orf226 C4orf48
## 1 1 1 1 1 1 1
## ETV5 PTCHD4 GPR162 NOX5 HP TUBB3 SLC22A31
## 1 1 1 1 1 1 1
## EPPK1 SPON1 ARHGAP23 SRCIN1
## 1 1 1 1
##
## $csize
## [1] 510 2 3 2 2 3
##
## $no
## [1] 6
net.c <- induced_subgraph(net, V(net)[which(c$membership == 1)])
net.c <- simplify(net.c)
deg <- igraph::degree(net.c, mode ='out')
clrvek <- pmax(0,pmin(deg,4))
clrname <- c('0'='lightgray','1'='#FF1744','2'='#F0A830','3'='#A155B9','4'='#165BAA')
ggnet2(net.c,size = 'outdegree', max_size = 4 ,color = clrvek, label = T, label.size = 2, legend.position = 'bottom', size.cut = 5, color.legend = 'clr~degree',palette = clrname)
Everything can be found in :https://github.com/iamandreatonina/Bioinformatics_Romanel_project-